diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,132412 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.446343779677113, + "global_step": 11000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7, + "compression_loss": 0.0, + "distillation_loss": 1.842069387435913, + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 1.7228, + "step": 1, + "task_loss": 0.6496906280517578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7000356083082708, + "compression_loss": 0.0, + "distillation_loss": 1.8232104778289795, + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 1.6991, + "step": 2, + "task_loss": 0.5824222564697266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7000712081625273, + "compression_loss": 0.0, + "distillation_loss": 1.8167695999145508, + "epoch": 0.0, + "learning_rate": 4.99999977293148e-05, + "loss": 1.6968, + "step": 3, + "task_loss": 0.6166934967041016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7001067995637733, + "compression_loss": 0.0, + "distillation_loss": 1.810304880142212, + "epoch": 0.0, + "learning_rate": 4.999999091725961e-05, + "loss": 1.6919, + "step": 4, + "task_loss": 0.6265115737915039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7001423825130124, + "compression_loss": 0.0, + "distillation_loss": 1.7883914709091187, + "epoch": 0.0, + "learning_rate": 4.999997956383567e-05, + "loss": 1.6653, + "step": 5, + "task_loss": 0.5578117370605469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7001779570112483, + "compression_loss": 0.0, + "distillation_loss": 1.4738185405731201, + "epoch": 0.01, + "learning_rate": 4.999996366904504e-05, + "loss": 1.3741, + "step": 6, + "task_loss": 0.47693514823913574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7002135230594846, + "compression_loss": 0.0, + "distillation_loss": 1.6521902084350586, + "epoch": 0.01, + "learning_rate": 4.999996366904504e-05, + "loss": 1.5516, + "step": 7, + "task_loss": 0.646312952041626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.700249080658725, + "compression_loss": 0.0, + "distillation_loss": 1.5596864223480225, + "epoch": 0.01, + "learning_rate": 4.999994323289061e-05, + "loss": 1.4611, + "step": 8, + "task_loss": 0.5743134021759033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7002846298099733, + "compression_loss": 0.0, + "distillation_loss": 1.4804778099060059, + "epoch": 0.01, + "learning_rate": 4.999991825537609e-05, + "loss": 1.3854, + "step": 9, + "task_loss": 0.5301415920257568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7003201705142331, + "compression_loss": 0.0, + "distillation_loss": 1.4417721033096313, + "epoch": 0.01, + "learning_rate": 4.999988873650602e-05, + "loss": 1.343, + "step": 10, + "task_loss": 0.4539656639099121 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.700355702772508, + "compression_loss": 0.0, + "distillation_loss": 1.2685056924819946, + "epoch": 0.01, + "learning_rate": 4.999985467628575e-05, + "loss": 1.1892, + "step": 11, + "task_loss": 0.47563648223876953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7003912265858019, + "compression_loss": 0.0, + "distillation_loss": 1.2336152791976929, + "epoch": 0.01, + "learning_rate": 4.999981607472149e-05, + "loss": 1.1549, + "step": 12, + "task_loss": 0.44687366485595703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7004267419551183, + "compression_loss": 0.0, + "distillation_loss": 1.1893235445022583, + "epoch": 0.01, + "learning_rate": 4.999977293182023e-05, + "loss": 1.1171, + "step": 13, + "task_loss": 0.4666560888290405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7004622488814609, + "compression_loss": 0.0, + "distillation_loss": 1.2539210319519043, + "epoch": 0.01, + "learning_rate": 4.999972524758982e-05, + "loss": 1.1771, + "step": 14, + "task_loss": 0.48534464836120605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7004977473658335, + "compression_loss": 0.0, + "distillation_loss": 0.7343780994415283, + "epoch": 0.01, + "learning_rate": 4.999967302203893e-05, + "loss": 0.6899, + "step": 15, + "task_loss": 0.28955867886543274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7005332374092396, + "compression_loss": 0.0, + "distillation_loss": 1.198635220527649, + "epoch": 0.02, + "learning_rate": 4.9999616255177016e-05, + "loss": 1.1238, + "step": 16, + "task_loss": 0.4500184953212738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.700568719012683, + "compression_loss": 0.0, + "distillation_loss": 0.9082566499710083, + "epoch": 0.02, + "learning_rate": 4.999955494701443e-05, + "loss": 0.851, + "step": 17, + "task_loss": 0.3353431224822998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7006041921771674, + "compression_loss": 0.0, + "distillation_loss": 1.1613315343856812, + "epoch": 0.02, + "learning_rate": 4.999948909756227e-05, + "loss": 1.0989, + "step": 18, + "task_loss": 0.5371114015579224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7006396569036965, + "compression_loss": 0.0, + "distillation_loss": 1.3335918188095093, + "epoch": 0.02, + "learning_rate": 4.9999418706832525e-05, + "loss": 1.2628, + "step": 19, + "task_loss": 0.626000165939331 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7006751131932738, + "compression_loss": 0.0, + "distillation_loss": 1.0266211032867432, + "epoch": 0.02, + "learning_rate": 4.9999343774837976e-05, + "loss": 0.9634, + "step": 20, + "task_loss": 0.39482954144477844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7007105610469031, + "compression_loss": 0.0, + "distillation_loss": 0.8103340864181519, + "epoch": 0.02, + "learning_rate": 4.999926430159223e-05, + "loss": 0.7583, + "step": 21, + "task_loss": 0.2896346151828766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7007460004655882, + "compression_loss": 0.0, + "distillation_loss": 1.3146494626998901, + "epoch": 0.02, + "learning_rate": 4.9999180287109725e-05, + "loss": 1.2389, + "step": 22, + "task_loss": 0.5570744276046753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7007814314503326, + "compression_loss": 0.0, + "distillation_loss": 0.8772894740104675, + "epoch": 0.02, + "learning_rate": 4.999909173140572e-05, + "loss": 0.8244, + "step": 23, + "task_loss": 0.3481764495372772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.70081685400214, + "compression_loss": 0.0, + "distillation_loss": 0.7861129641532898, + "epoch": 0.02, + "learning_rate": 4.999899863449631e-05, + "loss": 0.736, + "step": 24, + "task_loss": 0.2852080166339874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7008522681220143, + "compression_loss": 0.0, + "distillation_loss": 1.023297667503357, + "epoch": 0.02, + "learning_rate": 4.99989009963984e-05, + "loss": 0.9664, + "step": 25, + "task_loss": 0.45472821593284607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7008876738109588, + "compression_loss": 0.0, + "distillation_loss": 0.6092305183410645, + "epoch": 0.02, + "learning_rate": 4.999879881712973e-05, + "loss": 0.5785, + "step": 26, + "task_loss": 0.3023172616958618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7009230710699776, + "compression_loss": 0.0, + "distillation_loss": 0.6762804985046387, + "epoch": 0.03, + "learning_rate": 4.999869209670885e-05, + "loss": 0.6369, + "step": 27, + "task_loss": 0.28206872940063477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7009584599000741, + "compression_loss": 0.0, + "distillation_loss": 1.0091867446899414, + "epoch": 0.03, + "learning_rate": 4.999858083515517e-05, + "loss": 0.9509, + "step": 28, + "task_loss": 0.4264417588710785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7009938403022521, + "compression_loss": 0.0, + "distillation_loss": 0.5569751262664795, + "epoch": 0.03, + "learning_rate": 4.999846503248888e-05, + "loss": 0.5272, + "step": 29, + "task_loss": 0.2587721347808838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7010292122775152, + "compression_loss": 0.0, + "distillation_loss": 1.1096380949020386, + "epoch": 0.03, + "learning_rate": 4.9998344688731027e-05, + "loss": 1.0493, + "step": 30, + "task_loss": 0.5061087608337402 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7010645758268672, + "compression_loss": 0.0, + "distillation_loss": 1.5996776819229126, + "epoch": 0.03, + "learning_rate": 4.999821980390346e-05, + "loss": 1.5213, + "step": 31, + "task_loss": 0.8160263299942017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7010999309513116, + "compression_loss": 0.0, + "distillation_loss": 1.0707523822784424, + "epoch": 0.03, + "learning_rate": 4.999809037802888e-05, + "loss": 1.0159, + "step": 32, + "task_loss": 0.5219181776046753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7011352776518522, + "compression_loss": 0.0, + "distillation_loss": 0.9755439162254333, + "epoch": 0.03, + "learning_rate": 4.999795641113079e-05, + "loss": 0.9262, + "step": 33, + "task_loss": 0.4821498394012451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7011706159294928, + "compression_loss": 0.0, + "distillation_loss": 1.1395909786224365, + "epoch": 0.03, + "learning_rate": 4.9997817903233527e-05, + "loss": 1.1089, + "step": 34, + "task_loss": 0.8322716355323792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7012059457852369, + "compression_loss": 0.0, + "distillation_loss": 0.9883664846420288, + "epoch": 0.03, + "learning_rate": 4.999767485436224e-05, + "loss": 0.9305, + "step": 35, + "task_loss": 0.41015535593032837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7012412672200883, + "compression_loss": 0.0, + "distillation_loss": 0.8167462944984436, + "epoch": 0.03, + "learning_rate": 4.999752726454293e-05, + "loss": 0.7736, + "step": 36, + "task_loss": 0.3853650391101837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7012765802350505, + "compression_loss": 0.0, + "distillation_loss": 0.7159909009933472, + "epoch": 0.04, + "learning_rate": 4.9997375133802415e-05, + "loss": 0.6719, + "step": 37, + "task_loss": 0.27524322271347046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7013118848311274, + "compression_loss": 0.0, + "distillation_loss": 0.8945153951644897, + "epoch": 0.04, + "learning_rate": 4.999721846216831e-05, + "loss": 0.8469, + "step": 38, + "task_loss": 0.4183288812637329 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7013471810093225, + "compression_loss": 0.0, + "distillation_loss": 0.7273141145706177, + "epoch": 0.04, + "learning_rate": 4.999705724966908e-05, + "loss": 0.6819, + "step": 39, + "task_loss": 0.27272433042526245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7013824687706396, + "compression_loss": 0.0, + "distillation_loss": 0.5879485011100769, + "epoch": 0.04, + "learning_rate": 4.999689149633402e-05, + "loss": 0.5481, + "step": 40, + "task_loss": 0.18962328135967255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7014177481160824, + "compression_loss": 0.0, + "distillation_loss": 0.3988691568374634, + "epoch": 0.04, + "learning_rate": 4.999672120219323e-05, + "loss": 0.3689, + "step": 41, + "task_loss": 0.09930090606212616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7014530190466545, + "compression_loss": 0.0, + "distillation_loss": 1.1346763372421265, + "epoch": 0.04, + "learning_rate": 4.999654636727764e-05, + "loss": 1.0762, + "step": 42, + "task_loss": 0.5502893924713135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7014882815633595, + "compression_loss": 0.0, + "distillation_loss": 1.0310869216918945, + "epoch": 0.04, + "learning_rate": 4.9996366991619034e-05, + "loss": 0.9707, + "step": 43, + "task_loss": 0.42731019854545593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7015235356672014, + "compression_loss": 0.0, + "distillation_loss": 0.7622416615486145, + "epoch": 0.04, + "learning_rate": 4.999618307524997e-05, + "loss": 0.7244, + "step": 44, + "task_loss": 0.38374945521354675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7015587813591835, + "compression_loss": 0.0, + "distillation_loss": 1.0716254711151123, + "epoch": 0.04, + "learning_rate": 4.999599461820387e-05, + "loss": 1.0246, + "step": 45, + "task_loss": 0.6018234491348267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7015940186403098, + "compression_loss": 0.0, + "distillation_loss": 0.7515419721603394, + "epoch": 0.04, + "learning_rate": 4.999580162051497e-05, + "loss": 0.7087, + "step": 46, + "task_loss": 0.3226703405380249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7016292475115837, + "compression_loss": 0.0, + "distillation_loss": 1.1621253490447998, + "epoch": 0.04, + "learning_rate": 4.9995604082218314e-05, + "loss": 1.1132, + "step": 47, + "task_loss": 0.6729358434677124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7016644679740092, + "compression_loss": 0.0, + "distillation_loss": 0.5425492525100708, + "epoch": 0.05, + "learning_rate": 4.99954020033498e-05, + "loss": 0.5159, + "step": 48, + "task_loss": 0.2755749225616455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7016996800285896, + "compression_loss": 0.0, + "distillation_loss": 0.7733719348907471, + "epoch": 0.05, + "learning_rate": 4.9995195383946135e-05, + "loss": 0.7346, + "step": 49, + "task_loss": 0.38580086827278137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.701734883676329, + "compression_loss": 0.0, + "distillation_loss": 0.7129077911376953, + "epoch": 0.05, + "learning_rate": 4.999498422404485e-05, + "loss": 0.6741, + "step": 50, + "task_loss": 0.3249415457248688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7017700789182307, + "compression_loss": 0.0, + "distillation_loss": 0.8956947922706604, + "epoch": 0.05, + "learning_rate": 4.999476852368431e-05, + "loss": 0.8533, + "step": 51, + "task_loss": 0.4715935289859772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7018052657552987, + "compression_loss": 0.0, + "distillation_loss": 1.1765224933624268, + "epoch": 0.05, + "learning_rate": 4.999454828290369e-05, + "loss": 1.1277, + "step": 52, + "task_loss": 0.6885514855384827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7018404441885364, + "compression_loss": 0.0, + "distillation_loss": 0.7294641137123108, + "epoch": 0.05, + "learning_rate": 4.999432350174299e-05, + "loss": 0.6924, + "step": 53, + "task_loss": 0.3584412932395935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7018756142189477, + "compression_loss": 0.0, + "distillation_loss": 0.4732089936733246, + "epoch": 0.05, + "learning_rate": 4.9994094180243055e-05, + "loss": 0.4407, + "step": 54, + "task_loss": 0.14808303117752075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7019107758475361, + "compression_loss": 0.0, + "distillation_loss": 0.7207993268966675, + "epoch": 0.05, + "learning_rate": 4.999386031844554e-05, + "loss": 0.6882, + "step": 55, + "task_loss": 0.39489054679870605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7019459290753055, + "compression_loss": 0.0, + "distillation_loss": 0.7156393527984619, + "epoch": 0.05, + "learning_rate": 4.999362191639293e-05, + "loss": 0.6781, + "step": 56, + "task_loss": 0.3404168486595154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7019810739032595, + "compression_loss": 0.0, + "distillation_loss": 0.528157651424408, + "epoch": 0.05, + "learning_rate": 4.999337897412852e-05, + "loss": 0.4956, + "step": 57, + "task_loss": 0.20256038010120392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7020162103324016, + "compression_loss": 0.0, + "distillation_loss": 0.7788621187210083, + "epoch": 0.06, + "learning_rate": 4.999313149169645e-05, + "loss": 0.7403, + "step": 58, + "task_loss": 0.3931070566177368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7020513383637357, + "compression_loss": 0.0, + "distillation_loss": 1.1968752145767212, + "epoch": 0.06, + "learning_rate": 4.999287946914169e-05, + "loss": 1.1309, + "step": 59, + "task_loss": 0.5368590950965881 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7020864579982654, + "compression_loss": 0.0, + "distillation_loss": 0.7765418887138367, + "epoch": 0.06, + "learning_rate": 4.999262290651e-05, + "loss": 0.7296, + "step": 60, + "task_loss": 0.3072131276130676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7021215692369944, + "compression_loss": 0.0, + "distillation_loss": 0.9318764209747314, + "epoch": 0.06, + "learning_rate": 4.9992361803847995e-05, + "loss": 0.8886, + "step": 61, + "task_loss": 0.49911895394325256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7021566720809264, + "compression_loss": 0.0, + "distillation_loss": 0.6415928602218628, + "epoch": 0.06, + "learning_rate": 4.99920961612031e-05, + "loss": 0.6061, + "step": 62, + "task_loss": 0.2865428924560547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.702191766531065, + "compression_loss": 0.0, + "distillation_loss": 0.8092690706253052, + "epoch": 0.06, + "learning_rate": 4.9991825978623574e-05, + "loss": 0.7653, + "step": 63, + "task_loss": 0.3699354827404022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7022268525884139, + "compression_loss": 0.0, + "distillation_loss": 0.618144690990448, + "epoch": 0.06, + "learning_rate": 4.9991551256158495e-05, + "loss": 0.5829, + "step": 64, + "task_loss": 0.26606857776641846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7022619302539769, + "compression_loss": 0.0, + "distillation_loss": 0.8759365677833557, + "epoch": 0.06, + "learning_rate": 4.999127199385778e-05, + "loss": 0.8231, + "step": 65, + "task_loss": 0.3478449881076813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7022969995287576, + "compression_loss": 0.0, + "distillation_loss": 0.4816649556159973, + "epoch": 0.06, + "learning_rate": 4.999098819177214e-05, + "loss": 0.4514, + "step": 66, + "task_loss": 0.17922161519527435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7023320604137597, + "compression_loss": 0.0, + "distillation_loss": 0.5684869289398193, + "epoch": 0.06, + "learning_rate": 4.999069984995314e-05, + "loss": 0.535, + "step": 67, + "task_loss": 0.23375429213047028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7023671129099868, + "compression_loss": 0.0, + "distillation_loss": 0.7121763825416565, + "epoch": 0.06, + "learning_rate": 4.999040696845315e-05, + "loss": 0.6668, + "step": 68, + "task_loss": 0.25801488757133484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7024021570184427, + "compression_loss": 0.0, + "distillation_loss": 0.5653649568557739, + "epoch": 0.07, + "learning_rate": 4.999010954732538e-05, + "loss": 0.5359, + "step": 69, + "task_loss": 0.27088284492492676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7024371927401311, + "compression_loss": 0.0, + "distillation_loss": 0.8046922087669373, + "epoch": 0.07, + "learning_rate": 4.998980758662386e-05, + "loss": 0.7569, + "step": 70, + "task_loss": 0.3263823986053467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7024722200760555, + "compression_loss": 0.0, + "distillation_loss": 0.6771596670150757, + "epoch": 0.07, + "learning_rate": 4.998950108640345e-05, + "loss": 0.6458, + "step": 71, + "task_loss": 0.3636930286884308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7025072390272197, + "compression_loss": 0.0, + "distillation_loss": 0.41592103242874146, + "epoch": 0.07, + "learning_rate": 4.99891900467198e-05, + "loss": 0.3859, + "step": 72, + "task_loss": 0.11569308489561081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7025422495946274, + "compression_loss": 0.0, + "distillation_loss": 0.5050232410430908, + "epoch": 0.07, + "learning_rate": 4.9988874467629435e-05, + "loss": 0.4809, + "step": 73, + "task_loss": 0.26406604051589966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7025772517792822, + "compression_loss": 0.0, + "distillation_loss": 0.7662781476974487, + "epoch": 0.07, + "learning_rate": 4.998855434918968e-05, + "loss": 0.7258, + "step": 74, + "task_loss": 0.36163684725761414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7026122455821879, + "compression_loss": 0.0, + "distillation_loss": 0.5249364376068115, + "epoch": 0.07, + "learning_rate": 4.998822969145868e-05, + "loss": 0.4962, + "step": 75, + "task_loss": 0.23764313757419586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7026472310043481, + "compression_loss": 0.0, + "distillation_loss": 0.755010187625885, + "epoch": 0.07, + "learning_rate": 4.99879004944954e-05, + "loss": 0.7195, + "step": 76, + "task_loss": 0.4000910520553589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7026822080467666, + "compression_loss": 0.0, + "distillation_loss": 0.5521838068962097, + "epoch": 0.07, + "learning_rate": 4.998756675835966e-05, + "loss": 0.5156, + "step": 77, + "task_loss": 0.1858932077884674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7027171767104469, + "compression_loss": 0.0, + "distillation_loss": 0.7275650501251221, + "epoch": 0.07, + "learning_rate": 4.9987228483112083e-05, + "loss": 0.694, + "step": 78, + "task_loss": 0.39170515537261963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7027521369963928, + "compression_loss": 0.0, + "distillation_loss": 0.6621497869491577, + "epoch": 0.08, + "learning_rate": 4.998688566881411e-05, + "loss": 0.6312, + "step": 79, + "task_loss": 0.35260745882987976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7027870889056079, + "compression_loss": 0.0, + "distillation_loss": 0.8724344968795776, + "epoch": 0.08, + "learning_rate": 4.998653831552801e-05, + "loss": 0.8453, + "step": 80, + "task_loss": 0.6009440422058105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.702822032439096, + "compression_loss": 0.0, + "distillation_loss": 0.6811240911483765, + "epoch": 0.08, + "learning_rate": 4.998618642331689e-05, + "loss": 0.6469, + "step": 81, + "task_loss": 0.33839285373687744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7028569675978606, + "compression_loss": 0.0, + "distillation_loss": 0.5384796857833862, + "epoch": 0.08, + "learning_rate": 4.9985829992244675e-05, + "loss": 0.508, + "step": 82, + "task_loss": 0.2334153652191162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7028918943829057, + "compression_loss": 0.0, + "distillation_loss": 0.8584867715835571, + "epoch": 0.08, + "learning_rate": 4.998546902237611e-05, + "loss": 0.811, + "step": 83, + "task_loss": 0.38361459970474243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7029268127952346, + "compression_loss": 0.0, + "distillation_loss": 0.419251024723053, + "epoch": 0.08, + "learning_rate": 4.9985103513776764e-05, + "loss": 0.4111, + "step": 84, + "task_loss": 0.3377459645271301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7029617228358512, + "compression_loss": 0.0, + "distillation_loss": 0.8462650179862976, + "epoch": 0.08, + "learning_rate": 4.998473346651303e-05, + "loss": 0.8097, + "step": 85, + "task_loss": 0.48073211312294006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7029966245057592, + "compression_loss": 0.0, + "distillation_loss": 1.191063404083252, + "epoch": 0.08, + "learning_rate": 4.9984358880652146e-05, + "loss": 1.1373, + "step": 86, + "task_loss": 0.653830349445343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7030315178059622, + "compression_loss": 0.0, + "distillation_loss": 0.8207377195358276, + "epoch": 0.08, + "learning_rate": 4.9983979756262136e-05, + "loss": 0.7749, + "step": 87, + "task_loss": 0.3625227212905884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7030664027374639, + "compression_loss": 0.0, + "distillation_loss": 0.5961652994155884, + "epoch": 0.08, + "learning_rate": 4.998359609341188e-05, + "loss": 0.5955, + "step": 88, + "task_loss": 0.589705228805542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.703101279301268, + "compression_loss": 0.0, + "distillation_loss": 0.7842130661010742, + "epoch": 0.08, + "learning_rate": 4.9983207892171074e-05, + "loss": 0.7404, + "step": 89, + "task_loss": 0.3462582230567932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7031361474983782, + "compression_loss": 0.0, + "distillation_loss": 0.8764528036117554, + "epoch": 0.09, + "learning_rate": 4.998281515261023e-05, + "loss": 0.83, + "step": 90, + "task_loss": 0.41157200932502747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7031710073297981, + "compression_loss": 0.0, + "distillation_loss": 1.0387656688690186, + "epoch": 0.09, + "learning_rate": 4.9982417874800704e-05, + "loss": 0.9859, + "step": 91, + "task_loss": 0.5104459524154663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7032058587965315, + "compression_loss": 0.0, + "distillation_loss": 0.4702117443084717, + "epoch": 0.09, + "learning_rate": 4.998201605881465e-05, + "loss": 0.4451, + "step": 92, + "task_loss": 0.2186840921640396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.703240701899582, + "compression_loss": 0.0, + "distillation_loss": 0.5508553981781006, + "epoch": 0.09, + "learning_rate": 4.9981609704725057e-05, + "loss": 0.516, + "step": 93, + "task_loss": 0.20186278223991394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7032755366399532, + "compression_loss": 0.0, + "distillation_loss": 0.4743342697620392, + "epoch": 0.09, + "learning_rate": 4.998119881260576e-05, + "loss": 0.4436, + "step": 94, + "task_loss": 0.16690446436405182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.703310363018649, + "compression_loss": 0.0, + "distillation_loss": 0.7569644451141357, + "epoch": 0.09, + "learning_rate": 4.9980783382531376e-05, + "loss": 0.7158, + "step": 95, + "task_loss": 0.3451739251613617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7033451810366729, + "compression_loss": 0.0, + "distillation_loss": 0.6965117454528809, + "epoch": 0.09, + "learning_rate": 4.998036341457739e-05, + "loss": 0.659, + "step": 96, + "task_loss": 0.32098302245140076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7033799906950287, + "compression_loss": 0.0, + "distillation_loss": 0.8976222276687622, + "epoch": 0.09, + "learning_rate": 4.997993890882008e-05, + "loss": 0.8545, + "step": 97, + "task_loss": 0.46677258610725403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.70341479199472, + "compression_loss": 0.0, + "distillation_loss": 1.0592153072357178, + "epoch": 0.09, + "learning_rate": 4.997950986533656e-05, + "loss": 1.0024, + "step": 98, + "task_loss": 0.4912152886390686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7034495849367505, + "compression_loss": 0.0, + "distillation_loss": 0.7887797355651855, + "epoch": 0.09, + "learning_rate": 4.997907628420477e-05, + "loss": 0.7517, + "step": 99, + "task_loss": 0.4175463318824768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7034843695221239, + "compression_loss": 0.0, + "distillation_loss": 0.7304688692092896, + "epoch": 0.09, + "learning_rate": 4.9978638165503475e-05, + "loss": 0.6888, + "step": 100, + "task_loss": 0.3141331076622009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7035191457518439, + "compression_loss": 0.0, + "distillation_loss": 0.7318391799926758, + "epoch": 0.1, + "learning_rate": 4.9978195509312266e-05, + "loss": 0.7035, + "step": 101, + "task_loss": 0.4489053189754486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7035539136269141, + "compression_loss": 0.0, + "distillation_loss": 0.5332415699958801, + "epoch": 0.1, + "learning_rate": 4.997774831571154e-05, + "loss": 0.5093, + "step": 102, + "task_loss": 0.2942197620868683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7035886731483383, + "compression_loss": 0.0, + "distillation_loss": 0.6230442523956299, + "epoch": 0.1, + "learning_rate": 4.9977296584782544e-05, + "loss": 0.5832, + "step": 103, + "task_loss": 0.2250797152519226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7036234243171201, + "compression_loss": 0.0, + "distillation_loss": 0.28762802481651306, + "epoch": 0.1, + "learning_rate": 4.997684031660732e-05, + "loss": 0.2797, + "step": 104, + "task_loss": 0.20829369127750397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7036581671342632, + "compression_loss": 0.0, + "distillation_loss": 0.7059702277183533, + "epoch": 0.1, + "learning_rate": 4.997637951126877e-05, + "loss": 0.6714, + "step": 105, + "task_loss": 0.36017781496047974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7036929016007714, + "compression_loss": 0.0, + "distillation_loss": 0.9591817855834961, + "epoch": 0.1, + "learning_rate": 4.997591416885059e-05, + "loss": 0.9062, + "step": 106, + "task_loss": 0.4291348457336426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7037276277176481, + "compression_loss": 0.0, + "distillation_loss": 0.5214189887046814, + "epoch": 0.1, + "learning_rate": 4.997544428943732e-05, + "loss": 0.4899, + "step": 107, + "task_loss": 0.20646995306015015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7037623454858972, + "compression_loss": 0.0, + "distillation_loss": 0.7594773769378662, + "epoch": 0.1, + "learning_rate": 4.997496987311431e-05, + "loss": 0.7246, + "step": 108, + "task_loss": 0.41065266728401184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7037970549065223, + "compression_loss": 0.0, + "distillation_loss": 1.0592186450958252, + "epoch": 0.1, + "learning_rate": 4.997449091996774e-05, + "loss": 1.0102, + "step": 109, + "task_loss": 0.5689947605133057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7038317559805272, + "compression_loss": 0.0, + "distillation_loss": 0.717729389667511, + "epoch": 0.1, + "learning_rate": 4.9974007430084617e-05, + "loss": 0.693, + "step": 110, + "task_loss": 0.47034698724746704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7038664487089155, + "compression_loss": 0.0, + "distillation_loss": 0.5633364915847778, + "epoch": 0.11, + "learning_rate": 4.997351940355277e-05, + "loss": 0.5359, + "step": 111, + "task_loss": 0.2890656292438507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7039011330926908, + "compression_loss": 0.0, + "distillation_loss": 0.8346014618873596, + "epoch": 0.11, + "learning_rate": 4.997302684046085e-05, + "loss": 0.7913, + "step": 112, + "task_loss": 0.4014682173728943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7039358091328568, + "compression_loss": 0.0, + "distillation_loss": 0.6383477449417114, + "epoch": 0.11, + "learning_rate": 4.997252974089833e-05, + "loss": 0.6, + "step": 113, + "task_loss": 0.25466716289520264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7039704768304174, + "compression_loss": 0.0, + "distillation_loss": 1.142728328704834, + "epoch": 0.11, + "learning_rate": 4.997202810495551e-05, + "loss": 1.0939, + "step": 114, + "task_loss": 0.6547843813896179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7040051361863761, + "compression_loss": 0.0, + "distillation_loss": 0.4164957106113434, + "epoch": 0.11, + "learning_rate": 4.997152193272353e-05, + "loss": 0.391, + "step": 115, + "task_loss": 0.16178376972675323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7040397872017365, + "compression_loss": 0.0, + "distillation_loss": 0.24880273640155792, + "epoch": 0.11, + "learning_rate": 4.9971011224294314e-05, + "loss": 0.2379, + "step": 116, + "task_loss": 0.14008797705173492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7040744298775025, + "compression_loss": 0.0, + "distillation_loss": 0.9239636659622192, + "epoch": 0.11, + "learning_rate": 4.997049597976066e-05, + "loss": 0.884, + "step": 117, + "task_loss": 0.5244091749191284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7041090642146776, + "compression_loss": 0.0, + "distillation_loss": 0.43864983320236206, + "epoch": 0.11, + "learning_rate": 4.9969976199216144e-05, + "loss": 0.4116, + "step": 118, + "task_loss": 0.16765302419662476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7041436902142656, + "compression_loss": 0.0, + "distillation_loss": 0.5404030084609985, + "epoch": 0.11, + "learning_rate": 4.9969451882755196e-05, + "loss": 0.5152, + "step": 119, + "task_loss": 0.28787752985954285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7041783078772701, + "compression_loss": 0.0, + "distillation_loss": 0.4323447644710541, + "epoch": 0.11, + "learning_rate": 4.996892303047306e-05, + "loss": 0.4133, + "step": 120, + "task_loss": 0.24157865345478058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7042129172046948, + "compression_loss": 0.0, + "distillation_loss": 0.6243703365325928, + "epoch": 0.11, + "learning_rate": 4.996838964246581e-05, + "loss": 0.5941, + "step": 121, + "task_loss": 0.3215750753879547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7042475181975434, + "compression_loss": 0.0, + "distillation_loss": 0.7456376552581787, + "epoch": 0.12, + "learning_rate": 4.996785171883032e-05, + "loss": 0.7073, + "step": 122, + "task_loss": 0.3622223436832428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7042821108568197, + "compression_loss": 0.0, + "distillation_loss": 0.7294185161590576, + "epoch": 0.12, + "learning_rate": 4.996730925966433e-05, + "loss": 0.694, + "step": 123, + "task_loss": 0.3748391270637512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7043166951835271, + "compression_loss": 0.0, + "distillation_loss": 1.1894690990447998, + "epoch": 0.12, + "learning_rate": 4.996676226506636e-05, + "loss": 1.1356, + "step": 124, + "task_loss": 0.6504833102226257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7043512711786696, + "compression_loss": 0.0, + "distillation_loss": 0.6352778673171997, + "epoch": 0.12, + "learning_rate": 4.9966210735135785e-05, + "loss": 0.6021, + "step": 125, + "task_loss": 0.3033022880554199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7043858388432506, + "compression_loss": 0.0, + "distillation_loss": 0.4930354058742523, + "epoch": 0.12, + "learning_rate": 4.9965654669972794e-05, + "loss": 0.4721, + "step": 126, + "task_loss": 0.2833815813064575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.704420398178274, + "compression_loss": 0.0, + "distillation_loss": 1.0228924751281738, + "epoch": 0.12, + "learning_rate": 4.99650940696784e-05, + "loss": 0.9778, + "step": 127, + "task_loss": 0.571823000907898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7044549491847434, + "compression_loss": 0.0, + "distillation_loss": 0.5655504465103149, + "epoch": 0.12, + "learning_rate": 4.996452893435442e-05, + "loss": 0.5301, + "step": 128, + "task_loss": 0.21055805683135986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7044894918636624, + "compression_loss": 0.0, + "distillation_loss": 0.6770893335342407, + "epoch": 0.12, + "learning_rate": 4.9963959264103544e-05, + "loss": 0.6397, + "step": 129, + "task_loss": 0.30343693494796753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7045240262160347, + "compression_loss": 0.0, + "distillation_loss": 0.4913747012615204, + "epoch": 0.12, + "learning_rate": 4.996338505902924e-05, + "loss": 0.4605, + "step": 130, + "task_loss": 0.1823093295097351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7045585522428641, + "compression_loss": 0.0, + "distillation_loss": 0.5457890033721924, + "epoch": 0.12, + "learning_rate": 4.996280631923581e-05, + "loss": 0.5186, + "step": 131, + "task_loss": 0.2735254168510437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7045930699451544, + "compression_loss": 0.0, + "distillation_loss": 0.2785548269748688, + "epoch": 0.13, + "learning_rate": 4.9962223044828396e-05, + "loss": 0.2587, + "step": 132, + "task_loss": 0.08010871708393097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7046275793239088, + "compression_loss": 0.0, + "distillation_loss": 0.49103331565856934, + "epoch": 0.13, + "learning_rate": 4.9961635235912935e-05, + "loss": 0.4746, + "step": 133, + "task_loss": 0.32637351751327515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7046620803801315, + "compression_loss": 0.0, + "distillation_loss": 0.3778892755508423, + "epoch": 0.13, + "learning_rate": 4.9961042892596225e-05, + "loss": 0.3622, + "step": 134, + "task_loss": 0.2211739420890808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7046965731148259, + "compression_loss": 0.0, + "distillation_loss": 0.4187111258506775, + "epoch": 0.13, + "learning_rate": 4.996044601498586e-05, + "loss": 0.3963, + "step": 135, + "task_loss": 0.1942349374294281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7047310575289957, + "compression_loss": 0.0, + "distillation_loss": 0.5608755350112915, + "epoch": 0.13, + "learning_rate": 4.995984460319026e-05, + "loss": 0.5274, + "step": 136, + "task_loss": 0.22652751207351685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7047655336236447, + "compression_loss": 0.0, + "distillation_loss": 0.642856776714325, + "epoch": 0.13, + "learning_rate": 4.995923865731869e-05, + "loss": 0.6072, + "step": 137, + "task_loss": 0.28608477115631104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7048000013997765, + "compression_loss": 0.0, + "distillation_loss": 0.6759918928146362, + "epoch": 0.13, + "learning_rate": 4.9958628177481195e-05, + "loss": 0.6456, + "step": 138, + "task_loss": 0.37211018800735474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7048344608583947, + "compression_loss": 0.0, + "distillation_loss": 0.5849887728691101, + "epoch": 0.13, + "learning_rate": 4.99580131637887e-05, + "loss": 0.5497, + "step": 139, + "task_loss": 0.23251289129257202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7048689120005032, + "compression_loss": 0.0, + "distillation_loss": 0.5638033151626587, + "epoch": 0.13, + "learning_rate": 4.995739361635292e-05, + "loss": 0.5441, + "step": 140, + "task_loss": 0.367082804441452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7049033548271055, + "compression_loss": 0.0, + "distillation_loss": 0.4791497588157654, + "epoch": 0.13, + "learning_rate": 4.9956769535286385e-05, + "loss": 0.4515, + "step": 141, + "task_loss": 0.20264172554016113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7049377893392055, + "compression_loss": 0.0, + "distillation_loss": 0.45656701922416687, + "epoch": 0.13, + "learning_rate": 4.9956140920702476e-05, + "loss": 0.4341, + "step": 142, + "task_loss": 0.23189952969551086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7049722155378065, + "compression_loss": 0.0, + "distillation_loss": 0.5628130435943604, + "epoch": 0.14, + "learning_rate": 4.995550777271538e-05, + "loss": 0.5244, + "step": 143, + "task_loss": 0.1786140650510788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7050066334239125, + "compression_loss": 0.0, + "distillation_loss": 0.8593398332595825, + "epoch": 0.14, + "learning_rate": 4.995487009144011e-05, + "loss": 0.817, + "step": 144, + "task_loss": 0.43626001477241516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7050410429985271, + "compression_loss": 0.0, + "distillation_loss": 0.6192554235458374, + "epoch": 0.14, + "learning_rate": 4.99542278769925e-05, + "loss": 0.5859, + "step": 145, + "task_loss": 0.2856113016605377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7050754442626539, + "compression_loss": 0.0, + "distillation_loss": 0.46989959478378296, + "epoch": 0.14, + "learning_rate": 4.995358112948921e-05, + "loss": 0.4416, + "step": 146, + "task_loss": 0.18703171610832214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7051098372172967, + "compression_loss": 0.0, + "distillation_loss": 0.7966834902763367, + "epoch": 0.14, + "learning_rate": 4.9952929849047734e-05, + "loss": 0.754, + "step": 147, + "task_loss": 0.3696812093257904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7051442218634592, + "compression_loss": 0.0, + "distillation_loss": 0.8801038265228271, + "epoch": 0.14, + "learning_rate": 4.9952274035786385e-05, + "loss": 0.8282, + "step": 148, + "task_loss": 0.3614765405654907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7051785982021449, + "compression_loss": 0.0, + "distillation_loss": 0.8862295150756836, + "epoch": 0.14, + "learning_rate": 4.9951613689824276e-05, + "loss": 0.8481, + "step": 149, + "task_loss": 0.5047279596328735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7052129662343577, + "compression_loss": 0.0, + "distillation_loss": 0.7615218162536621, + "epoch": 0.14, + "learning_rate": 4.995094881128138e-05, + "loss": 0.7294, + "step": 150, + "task_loss": 0.4406706392765045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7052473259611011, + "compression_loss": 0.0, + "distillation_loss": 0.5523242950439453, + "epoch": 0.14, + "learning_rate": 4.995027940027846e-05, + "loss": 0.5313, + "step": 151, + "task_loss": 0.3420305848121643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7052816773833789, + "compression_loss": 0.0, + "distillation_loss": 0.43813109397888184, + "epoch": 0.14, + "learning_rate": 4.9949605456937135e-05, + "loss": 0.4062, + "step": 152, + "task_loss": 0.11931411176919937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7053160205021947, + "compression_loss": 0.0, + "distillation_loss": 0.6433530449867249, + "epoch": 0.15, + "learning_rate": 4.994892698137981e-05, + "loss": 0.608, + "step": 153, + "task_loss": 0.2893901765346527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7053503553185523, + "compression_loss": 0.0, + "distillation_loss": 0.6064528226852417, + "epoch": 0.15, + "learning_rate": 4.9948243973729745e-05, + "loss": 0.5714, + "step": 154, + "task_loss": 0.25594377517700195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7053846818334553, + "compression_loss": 0.0, + "distillation_loss": 0.7984007000923157, + "epoch": 0.15, + "learning_rate": 4.994755643411101e-05, + "loss": 0.7624, + "step": 155, + "task_loss": 0.43833673000335693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7054190000479073, + "compression_loss": 0.0, + "distillation_loss": 0.5333908796310425, + "epoch": 0.15, + "learning_rate": 4.9946864362648506e-05, + "loss": 0.5041, + "step": 156, + "task_loss": 0.240193709731102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7054533099629121, + "compression_loss": 0.0, + "distillation_loss": 0.5313940048217773, + "epoch": 0.15, + "learning_rate": 4.994616775946794e-05, + "loss": 0.5092, + "step": 157, + "task_loss": 0.3092755973339081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7054876115794735, + "compression_loss": 0.0, + "distillation_loss": 0.6694386005401611, + "epoch": 0.15, + "learning_rate": 4.994546662469586e-05, + "loss": 0.6325, + "step": 158, + "task_loss": 0.3002847731113434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7055219048985948, + "compression_loss": 0.0, + "distillation_loss": 0.43738991022109985, + "epoch": 0.15, + "learning_rate": 4.9944760958459624e-05, + "loss": 0.4088, + "step": 159, + "task_loss": 0.15195739269256592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7055561899212801, + "compression_loss": 0.0, + "distillation_loss": 0.6324198842048645, + "epoch": 0.15, + "learning_rate": 4.994405076088743e-05, + "loss": 0.6241, + "step": 160, + "task_loss": 0.5489103198051453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7055904666485329, + "compression_loss": 0.0, + "distillation_loss": 0.4191284775733948, + "epoch": 0.15, + "learning_rate": 4.994333603210829e-05, + "loss": 0.4006, + "step": 161, + "task_loss": 0.23342673480510712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7056247350813567, + "compression_loss": 0.0, + "distillation_loss": 0.5982824563980103, + "epoch": 0.15, + "learning_rate": 4.9942616772252016e-05, + "loss": 0.5746, + "step": 162, + "task_loss": 0.3616164028644562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7056589952207556, + "compression_loss": 0.0, + "distillation_loss": 0.279816210269928, + "epoch": 0.15, + "learning_rate": 4.994189298144929e-05, + "loss": 0.2647, + "step": 163, + "task_loss": 0.12842896580696106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7056932470677328, + "compression_loss": 0.0, + "distillation_loss": 0.26615631580352783, + "epoch": 0.16, + "learning_rate": 4.994116465983158e-05, + "loss": 0.2481, + "step": 164, + "task_loss": 0.08531204611063004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7057274906232924, + "compression_loss": 0.0, + "distillation_loss": 0.7699984908103943, + "epoch": 0.16, + "learning_rate": 4.99404318075312e-05, + "loss": 0.7333, + "step": 165, + "task_loss": 0.4031785726547241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7057617258884379, + "compression_loss": 0.0, + "distillation_loss": 0.32784250378608704, + "epoch": 0.16, + "learning_rate": 4.993969442468125e-05, + "loss": 0.3131, + "step": 166, + "task_loss": 0.1799994260072708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7057959528641731, + "compression_loss": 0.0, + "distillation_loss": 0.5546020269393921, + "epoch": 0.16, + "learning_rate": 4.993895251141571e-05, + "loss": 0.5365, + "step": 167, + "task_loss": 0.37407755851745605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7058301715515014, + "compression_loss": 0.0, + "distillation_loss": 0.24405278265476227, + "epoch": 0.16, + "learning_rate": 4.9938206067869334e-05, + "loss": 0.2312, + "step": 168, + "task_loss": 0.11586499214172363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7058643819514266, + "compression_loss": 0.0, + "distillation_loss": 0.897857666015625, + "epoch": 0.16, + "learning_rate": 4.993745509417772e-05, + "loss": 0.8501, + "step": 169, + "task_loss": 0.42022186517715454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7058985840649525, + "compression_loss": 0.0, + "distillation_loss": 0.62211012840271, + "epoch": 0.16, + "learning_rate": 4.9936699590477296e-05, + "loss": 0.5941, + "step": 170, + "task_loss": 0.3424464166164398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7059327778930828, + "compression_loss": 0.0, + "distillation_loss": 0.4366634488105774, + "epoch": 0.16, + "learning_rate": 4.9935939556905295e-05, + "loss": 0.4205, + "step": 171, + "task_loss": 0.27515465021133423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7059669634368212, + "compression_loss": 0.0, + "distillation_loss": 0.4915463626384735, + "epoch": 0.16, + "learning_rate": 4.993517499359978e-05, + "loss": 0.4641, + "step": 172, + "task_loss": 0.2167641520500183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7060011406971711, + "compression_loss": 0.0, + "distillation_loss": 0.5092770457267761, + "epoch": 0.16, + "learning_rate": 4.993440590069963e-05, + "loss": 0.4841, + "step": 173, + "task_loss": 0.257138729095459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7060353096751364, + "compression_loss": 0.0, + "distillation_loss": 0.5051209926605225, + "epoch": 0.17, + "learning_rate": 4.993363227834457e-05, + "loss": 0.4766, + "step": 174, + "task_loss": 0.22033719718456268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7060694703717209, + "compression_loss": 0.0, + "distillation_loss": 0.6676339507102966, + "epoch": 0.17, + "learning_rate": 4.9932854126675124e-05, + "loss": 0.6391, + "step": 175, + "task_loss": 0.3822685778141022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7061036227879279, + "compression_loss": 0.0, + "distillation_loss": 0.2579382061958313, + "epoch": 0.17, + "learning_rate": 4.993207144583264e-05, + "loss": 0.2493, + "step": 176, + "task_loss": 0.17196613550186157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7061377669247615, + "compression_loss": 0.0, + "distillation_loss": 0.39016398787498474, + "epoch": 0.17, + "learning_rate": 4.993128423595931e-05, + "loss": 0.3674, + "step": 177, + "task_loss": 0.16221883893013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7061719027832251, + "compression_loss": 0.0, + "distillation_loss": 0.5886770486831665, + "epoch": 0.17, + "learning_rate": 4.9930492497198125e-05, + "loss": 0.5569, + "step": 178, + "task_loss": 0.2707720994949341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7062060303643225, + "compression_loss": 0.0, + "distillation_loss": 0.31564319133758545, + "epoch": 0.17, + "learning_rate": 4.992969622969292e-05, + "loss": 0.3039, + "step": 179, + "task_loss": 0.19809630513191223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7062401496690573, + "compression_loss": 0.0, + "distillation_loss": 0.35320743918418884, + "epoch": 0.17, + "learning_rate": 4.992889543358832e-05, + "loss": 0.3371, + "step": 180, + "task_loss": 0.19164326786994934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7062742606984334, + "compression_loss": 0.0, + "distillation_loss": 0.8013370633125305, + "epoch": 0.17, + "learning_rate": 4.9928090109029817e-05, + "loss": 0.7688, + "step": 181, + "task_loss": 0.4764384627342224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7063083634534542, + "compression_loss": 0.0, + "distillation_loss": 1.4201560020446777, + "epoch": 0.17, + "learning_rate": 4.9927280256163686e-05, + "loss": 1.3439, + "step": 182, + "task_loss": 0.6578447222709656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7063424579351235, + "compression_loss": 0.0, + "distillation_loss": 0.35098546743392944, + "epoch": 0.17, + "learning_rate": 4.992646587513705e-05, + "loss": 0.3229, + "step": 183, + "task_loss": 0.07008456438779831 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7063765441444451, + "compression_loss": 0.0, + "distillation_loss": 0.6404779553413391, + "epoch": 0.17, + "learning_rate": 4.9925646966097835e-05, + "loss": 0.6098, + "step": 184, + "task_loss": 0.3341161012649536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7064106220824224, + "compression_loss": 0.0, + "distillation_loss": 0.4935486912727356, + "epoch": 0.18, + "learning_rate": 4.99248235291948e-05, + "loss": 0.4679, + "step": 185, + "task_loss": 0.2367597073316574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7064446917500594, + "compression_loss": 0.0, + "distillation_loss": 0.3535561263561249, + "epoch": 0.18, + "learning_rate": 4.9923995564577544e-05, + "loss": 0.3314, + "step": 186, + "task_loss": 0.13228961825370789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7064787531483596, + "compression_loss": 0.0, + "distillation_loss": 0.6032556295394897, + "epoch": 0.18, + "learning_rate": 4.992316307239645e-05, + "loss": 0.572, + "step": 187, + "task_loss": 0.2907094657421112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7065128062783267, + "compression_loss": 0.0, + "distillation_loss": 1.0592761039733887, + "epoch": 0.18, + "learning_rate": 4.992232605280276e-05, + "loss": 1.0138, + "step": 188, + "task_loss": 0.6046409606933594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7065468511409644, + "compression_loss": 0.0, + "distillation_loss": 0.32051318883895874, + "epoch": 0.18, + "learning_rate": 4.992148450594851e-05, + "loss": 0.3152, + "step": 189, + "task_loss": 0.26700055599212646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7065808877372763, + "compression_loss": 0.0, + "distillation_loss": 0.4926970899105072, + "epoch": 0.18, + "learning_rate": 4.9920638431986574e-05, + "loss": 0.4646, + "step": 190, + "task_loss": 0.21135534346103668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7066149160682663, + "compression_loss": 0.0, + "distillation_loss": 0.7110521793365479, + "epoch": 0.18, + "learning_rate": 4.991978783107065e-05, + "loss": 0.6783, + "step": 191, + "task_loss": 0.38350385427474976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.706648936134938, + "compression_loss": 0.0, + "distillation_loss": 0.4119304120540619, + "epoch": 0.18, + "learning_rate": 4.9918932703355256e-05, + "loss": 0.3859, + "step": 192, + "task_loss": 0.15174490213394165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7066829479382948, + "compression_loss": 0.0, + "distillation_loss": 0.5923248529434204, + "epoch": 0.18, + "learning_rate": 4.991807304899572e-05, + "loss": 0.5592, + "step": 193, + "task_loss": 0.26117557287216187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7067169514793408, + "compression_loss": 0.0, + "distillation_loss": 0.41325411200523376, + "epoch": 0.18, + "learning_rate": 4.991720886814821e-05, + "loss": 0.3904, + "step": 194, + "task_loss": 0.18504031002521515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7067509467590795, + "compression_loss": 0.0, + "distillation_loss": 0.4917716085910797, + "epoch": 0.19, + "learning_rate": 4.99163401609697e-05, + "loss": 0.4618, + "step": 195, + "task_loss": 0.1917870044708252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7067849337785144, + "compression_loss": 0.0, + "distillation_loss": 0.48008596897125244, + "epoch": 0.19, + "learning_rate": 4.991546692761801e-05, + "loss": 0.4578, + "step": 196, + "task_loss": 0.25695863366127014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7068189125386495, + "compression_loss": 0.0, + "distillation_loss": 0.8246874213218689, + "epoch": 0.19, + "learning_rate": 4.991458916825176e-05, + "loss": 0.7932, + "step": 197, + "task_loss": 0.5102267265319824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7068528830404883, + "compression_loss": 0.0, + "distillation_loss": 0.49332180619239807, + "epoch": 0.19, + "learning_rate": 4.991370688303039e-05, + "loss": 0.4696, + "step": 198, + "task_loss": 0.25608059763908386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7068868452850345, + "compression_loss": 0.0, + "distillation_loss": 0.7726448178291321, + "epoch": 0.19, + "learning_rate": 4.9912820072114185e-05, + "loss": 0.7387, + "step": 199, + "task_loss": 0.43347033858299255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7069207992732918, + "compression_loss": 0.0, + "distillation_loss": 0.3915247917175293, + "epoch": 0.19, + "learning_rate": 4.9911928735664224e-05, + "loss": 0.368, + "step": 200, + "task_loss": 0.1565304845571518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7069547450062639, + "compression_loss": 0.0, + "distillation_loss": 0.39359933137893677, + "epoch": 0.19, + "learning_rate": 4.991103287384244e-05, + "loss": 0.3707, + "step": 201, + "task_loss": 0.1646425426006317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7069886824849545, + "compression_loss": 0.0, + "distillation_loss": 0.30474036931991577, + "epoch": 0.19, + "learning_rate": 4.9910132486811555e-05, + "loss": 0.2895, + "step": 202, + "task_loss": 0.15248741209506989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7070226117103672, + "compression_loss": 0.0, + "distillation_loss": 0.5055716037750244, + "epoch": 0.19, + "learning_rate": 4.990922757473514e-05, + "loss": 0.4763, + "step": 203, + "task_loss": 0.21311217546463013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7070565326835058, + "compression_loss": 0.0, + "distillation_loss": 0.6619958877563477, + "epoch": 0.19, + "learning_rate": 4.990831813777757e-05, + "loss": 0.6328, + "step": 204, + "task_loss": 0.36974024772644043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7070904454053738, + "compression_loss": 0.0, + "distillation_loss": 0.809003472328186, + "epoch": 0.19, + "learning_rate": 4.990740417610406e-05, + "loss": 0.7783, + "step": 205, + "task_loss": 0.5022717714309692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.707124349876975, + "compression_loss": 0.0, + "distillation_loss": 0.9417250156402588, + "epoch": 0.2, + "learning_rate": 4.9906485689880613e-05, + "loss": 0.898, + "step": 206, + "task_loss": 0.5048108100891113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7071582460993132, + "compression_loss": 0.0, + "distillation_loss": 0.4026396870613098, + "epoch": 0.2, + "learning_rate": 4.9905562679274096e-05, + "loss": 0.3856, + "step": 207, + "task_loss": 0.23229239881038666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7071921340733919, + "compression_loss": 0.0, + "distillation_loss": 0.45894497632980347, + "epoch": 0.2, + "learning_rate": 4.9904635144452164e-05, + "loss": 0.4355, + "step": 208, + "task_loss": 0.22411760687828064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7072260138002149, + "compression_loss": 0.0, + "distillation_loss": 0.5892167091369629, + "epoch": 0.2, + "learning_rate": 4.990370308558332e-05, + "loss": 0.5551, + "step": 209, + "task_loss": 0.24835461378097534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7072598852807856, + "compression_loss": 0.0, + "distillation_loss": 0.5774100422859192, + "epoch": 0.2, + "learning_rate": 4.9902766502836874e-05, + "loss": 0.5463, + "step": 210, + "task_loss": 0.2667173445224762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7072937485161082, + "compression_loss": 0.0, + "distillation_loss": 0.33134809136390686, + "epoch": 0.2, + "learning_rate": 4.9901825396382965e-05, + "loss": 0.3096, + "step": 211, + "task_loss": 0.11349479109048843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7073276035071859, + "compression_loss": 0.0, + "distillation_loss": 0.46322399377822876, + "epoch": 0.2, + "learning_rate": 4.990087976639254e-05, + "loss": 0.4346, + "step": 212, + "task_loss": 0.17661762237548828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7073614502550227, + "compression_loss": 0.0, + "distillation_loss": 0.4679402709007263, + "epoch": 0.2, + "learning_rate": 4.989992961303738e-05, + "loss": 0.445, + "step": 213, + "task_loss": 0.23898877203464508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7073952887606221, + "compression_loss": 0.0, + "distillation_loss": 0.44021981954574585, + "epoch": 0.2, + "learning_rate": 4.989897493649008e-05, + "loss": 0.43, + "step": 214, + "task_loss": 0.3383277654647827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7074291190249877, + "compression_loss": 0.0, + "distillation_loss": 0.5056917071342468, + "epoch": 0.2, + "learning_rate": 4.989801573692408e-05, + "loss": 0.4787, + "step": 215, + "task_loss": 0.23537424206733704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7074629410491236, + "compression_loss": 0.0, + "distillation_loss": 0.7158606052398682, + "epoch": 0.21, + "learning_rate": 4.989705201451361e-05, + "loss": 0.6868, + "step": 216, + "task_loss": 0.4253765344619751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.707496754834033, + "compression_loss": 0.0, + "distillation_loss": 0.4985395669937134, + "epoch": 0.21, + "learning_rate": 4.989608376943373e-05, + "loss": 0.4697, + "step": 217, + "task_loss": 0.21007469296455383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7075305603807197, + "compression_loss": 0.0, + "distillation_loss": 0.5073490738868713, + "epoch": 0.21, + "learning_rate": 4.9895111001860335e-05, + "loss": 0.4779, + "step": 218, + "task_loss": 0.21258264780044556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7075643576901876, + "compression_loss": 0.0, + "distillation_loss": 0.37614893913269043, + "epoch": 0.21, + "learning_rate": 4.989413371197013e-05, + "loss": 0.3534, + "step": 219, + "task_loss": 0.14843641221523285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7075981467634402, + "compression_loss": 0.0, + "distillation_loss": 0.6342090368270874, + "epoch": 0.21, + "learning_rate": 4.989315189994065e-05, + "loss": 0.6009, + "step": 220, + "task_loss": 0.3015548586845398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7076319276014813, + "compression_loss": 0.0, + "distillation_loss": 0.7175476551055908, + "epoch": 0.21, + "learning_rate": 4.9892165565950235e-05, + "loss": 0.6756, + "step": 221, + "task_loss": 0.2982741594314575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7076657002053144, + "compression_loss": 0.0, + "distillation_loss": 0.40784966945648193, + "epoch": 0.21, + "learning_rate": 4.9891174710178054e-05, + "loss": 0.389, + "step": 222, + "task_loss": 0.21939508616924286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7076994645759433, + "compression_loss": 0.0, + "distillation_loss": 0.5739084482192993, + "epoch": 0.21, + "learning_rate": 4.9890179332804125e-05, + "loss": 0.5553, + "step": 223, + "task_loss": 0.38766252994537354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7077332207143717, + "compression_loss": 0.0, + "distillation_loss": 0.44305968284606934, + "epoch": 0.21, + "learning_rate": 4.988917943400924e-05, + "loss": 0.4178, + "step": 224, + "task_loss": 0.19036653637886047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7077669686216033, + "compression_loss": 0.0, + "distillation_loss": 0.714052140712738, + "epoch": 0.21, + "learning_rate": 4.988817501397505e-05, + "loss": 0.6902, + "step": 225, + "task_loss": 0.4752587080001831 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7078007082986417, + "compression_loss": 0.0, + "distillation_loss": 0.36877432465553284, + "epoch": 0.21, + "learning_rate": 4.9887166072884e-05, + "loss": 0.3439, + "step": 226, + "task_loss": 0.11986754834651947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7078344397464906, + "compression_loss": 0.0, + "distillation_loss": 0.5401185750961304, + "epoch": 0.22, + "learning_rate": 4.988615261091938e-05, + "loss": 0.5173, + "step": 227, + "task_loss": 0.31204986572265625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7078681629661537, + "compression_loss": 0.0, + "distillation_loss": 0.6624010801315308, + "epoch": 0.22, + "learning_rate": 4.9885134628265276e-05, + "loss": 0.6317, + "step": 228, + "task_loss": 0.3551117479801178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7079018779586348, + "compression_loss": 0.0, + "distillation_loss": 0.5041351318359375, + "epoch": 0.22, + "learning_rate": 4.988411212510663e-05, + "loss": 0.4863, + "step": 229, + "task_loss": 0.3257242739200592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7079355847249373, + "compression_loss": 0.0, + "distillation_loss": 0.5081358551979065, + "epoch": 0.22, + "learning_rate": 4.988308510162917e-05, + "loss": 0.4942, + "step": 230, + "task_loss": 0.36878862977027893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7079692832660651, + "compression_loss": 0.0, + "distillation_loss": 0.4084128737449646, + "epoch": 0.22, + "learning_rate": 4.988205355801945e-05, + "loss": 0.3874, + "step": 231, + "task_loss": 0.19869351387023926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7080029735830219, + "compression_loss": 0.0, + "distillation_loss": 0.6727085113525391, + "epoch": 0.22, + "learning_rate": 4.988101749446488e-05, + "loss": 0.6462, + "step": 232, + "task_loss": 0.40740063786506653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7080366556768113, + "compression_loss": 0.0, + "distillation_loss": 0.3574530780315399, + "epoch": 0.22, + "learning_rate": 4.987997691115366e-05, + "loss": 0.3417, + "step": 233, + "task_loss": 0.20038984715938568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.708070329548437, + "compression_loss": 0.0, + "distillation_loss": 0.6499674916267395, + "epoch": 0.22, + "learning_rate": 4.98789318082748e-05, + "loss": 0.6204, + "step": 234, + "task_loss": 0.3542225658893585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7081039951989025, + "compression_loss": 0.0, + "distillation_loss": 0.2822619080543518, + "epoch": 0.22, + "learning_rate": 4.987788218601816e-05, + "loss": 0.2671, + "step": 235, + "task_loss": 0.13111086189746857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7081376526292118, + "compression_loss": 0.0, + "distillation_loss": 0.3758002519607544, + "epoch": 0.22, + "learning_rate": 4.987682804457441e-05, + "loss": 0.3537, + "step": 236, + "task_loss": 0.15476220846176147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7081713018403685, + "compression_loss": 0.0, + "distillation_loss": 0.42200687527656555, + "epoch": 0.23, + "learning_rate": 4.987576938413504e-05, + "loss": 0.4044, + "step": 237, + "task_loss": 0.24637091159820557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7082049428333762, + "compression_loss": 0.0, + "distillation_loss": 0.3074836730957031, + "epoch": 0.23, + "learning_rate": 4.987470620489235e-05, + "loss": 0.2978, + "step": 238, + "task_loss": 0.21041469275951385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7082385756092385, + "compression_loss": 0.0, + "distillation_loss": 0.19442984461784363, + "epoch": 0.23, + "learning_rate": 4.9873638507039486e-05, + "loss": 0.1873, + "step": 239, + "task_loss": 0.12338165193796158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7082722001689592, + "compression_loss": 0.0, + "distillation_loss": 0.3584289252758026, + "epoch": 0.23, + "learning_rate": 4.987256629077039e-05, + "loss": 0.34, + "step": 240, + "task_loss": 0.17371408641338348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7083058165135421, + "compression_loss": 0.0, + "distillation_loss": 0.8099121451377869, + "epoch": 0.23, + "learning_rate": 4.987148955627985e-05, + "loss": 0.7633, + "step": 241, + "task_loss": 0.34426361322402954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7083394246439906, + "compression_loss": 0.0, + "distillation_loss": 0.293813019990921, + "epoch": 0.23, + "learning_rate": 4.987040830376344e-05, + "loss": 0.2808, + "step": 242, + "task_loss": 0.16348132491111755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7083730245613087, + "compression_loss": 0.0, + "distillation_loss": 0.5357306599617004, + "epoch": 0.23, + "learning_rate": 4.9869322533417596e-05, + "loss": 0.504, + "step": 243, + "task_loss": 0.21806874871253967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7084066162664997, + "compression_loss": 0.0, + "distillation_loss": 0.6220539212226868, + "epoch": 0.23, + "learning_rate": 4.9868232245439525e-05, + "loss": 0.5916, + "step": 244, + "task_loss": 0.3177708387374878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7084401997605676, + "compression_loss": 0.0, + "distillation_loss": 0.49369296431541443, + "epoch": 0.23, + "learning_rate": 4.986713744002731e-05, + "loss": 0.4649, + "step": 245, + "task_loss": 0.20582452416419983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.708473775044516, + "compression_loss": 0.0, + "distillation_loss": 0.46345996856689453, + "epoch": 0.23, + "learning_rate": 4.9866038117379824e-05, + "loss": 0.4486, + "step": 246, + "task_loss": 0.31531885266304016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7085073421193486, + "compression_loss": 0.0, + "distillation_loss": 0.32199227809906006, + "epoch": 0.23, + "learning_rate": 4.986493427769675e-05, + "loss": 0.3051, + "step": 247, + "task_loss": 0.15327921509742737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.708540900986069, + "compression_loss": 0.0, + "distillation_loss": 0.4429311156272888, + "epoch": 0.24, + "learning_rate": 4.986382592117861e-05, + "loss": 0.4221, + "step": 248, + "task_loss": 0.2345043569803238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7085744516456809, + "compression_loss": 0.0, + "distillation_loss": 0.47061067819595337, + "epoch": 0.24, + "learning_rate": 4.986271304802675e-05, + "loss": 0.4492, + "step": 249, + "task_loss": 0.2561434507369995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7086079940991881, + "compression_loss": 0.0, + "distillation_loss": 0.4610680341720581, + "epoch": 0.24, + "learning_rate": 4.986159565844333e-05, + "loss": 0.4471, + "step": 250, + "task_loss": 0.32182347774505615 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.8532110091743119, + "eval_loss": 0.6153932809829712, + "eval_runtime": 14.8163, + "eval_samples_per_second": 58.854, + "eval_steps_per_second": 7.357, + "step": 250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.708641528347594, + "compression_loss": 0.0, + "distillation_loss": 0.53682541847229, + "epoch": 0.24, + "learning_rate": 4.986047375263131e-05, + "loss": 0.513, + "step": 251, + "task_loss": 0.2982673645019531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7086750543919026, + "compression_loss": 0.0, + "distillation_loss": 0.29910629987716675, + "epoch": 0.24, + "learning_rate": 4.9859347330794515e-05, + "loss": 0.2856, + "step": 252, + "task_loss": 0.16408580541610718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7087085722331175, + "compression_loss": 0.0, + "distillation_loss": 0.49715662002563477, + "epoch": 0.24, + "learning_rate": 4.985821639313755e-05, + "loss": 0.4719, + "step": 253, + "task_loss": 0.24493193626403809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7087420818722422, + "compression_loss": 0.0, + "distillation_loss": 0.6056504249572754, + "epoch": 0.24, + "learning_rate": 4.985708093986586e-05, + "loss": 0.5746, + "step": 254, + "task_loss": 0.29507917165756226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7087755833102806, + "compression_loss": 0.0, + "distillation_loss": 0.5323020219802856, + "epoch": 0.24, + "learning_rate": 4.98559409711857e-05, + "loss": 0.5011, + "step": 255, + "task_loss": 0.2197890430688858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7088090765482363, + "compression_loss": 0.0, + "distillation_loss": 0.5494606494903564, + "epoch": 0.24, + "learning_rate": 4.985479648730416e-05, + "loss": 0.5286, + "step": 256, + "task_loss": 0.3407706320285797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.708842561587113, + "compression_loss": 0.0, + "distillation_loss": 0.35420656204223633, + "epoch": 0.24, + "learning_rate": 4.985364748842914e-05, + "loss": 0.3456, + "step": 257, + "task_loss": 0.2684999108314514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7088760384279144, + "compression_loss": 0.0, + "distillation_loss": 0.4922611713409424, + "epoch": 0.25, + "learning_rate": 4.985249397476934e-05, + "loss": 0.4717, + "step": 258, + "task_loss": 0.2866893410682678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.708909507071644, + "compression_loss": 0.0, + "distillation_loss": 0.34109288454055786, + "epoch": 0.25, + "learning_rate": 4.985133594653434e-05, + "loss": 0.3198, + "step": 259, + "task_loss": 0.12784980237483978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7089429675193057, + "compression_loss": 0.0, + "distillation_loss": 0.3204500675201416, + "epoch": 0.25, + "learning_rate": 4.9850173403934466e-05, + "loss": 0.3082, + "step": 260, + "task_loss": 0.198293536901474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7089764197719032, + "compression_loss": 0.0, + "distillation_loss": 0.5372319221496582, + "epoch": 0.25, + "learning_rate": 4.9849006347180915e-05, + "loss": 0.5081, + "step": 261, + "task_loss": 0.24620920419692993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.70900986383044, + "compression_loss": 0.0, + "distillation_loss": 0.31807684898376465, + "epoch": 0.25, + "learning_rate": 4.9847834776485694e-05, + "loss": 0.3002, + "step": 262, + "task_loss": 0.13968217372894287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7090432996959198, + "compression_loss": 0.0, + "distillation_loss": 0.3310486078262329, + "epoch": 0.25, + "learning_rate": 4.984665869206161e-05, + "loss": 0.3205, + "step": 263, + "task_loss": 0.22510308027267456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7090767273693465, + "compression_loss": 0.0, + "distillation_loss": 0.15543615818023682, + "epoch": 0.25, + "learning_rate": 4.984547809412231e-05, + "loss": 0.15, + "step": 264, + "task_loss": 0.10149666666984558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7091101468517236, + "compression_loss": 0.0, + "distillation_loss": 0.4099940061569214, + "epoch": 0.25, + "learning_rate": 4.984429298288227e-05, + "loss": 0.385, + "step": 265, + "task_loss": 0.1603960394859314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7091435581440548, + "compression_loss": 0.0, + "distillation_loss": 0.6284467577934265, + "epoch": 0.25, + "learning_rate": 4.984310335855674e-05, + "loss": 0.5938, + "step": 266, + "task_loss": 0.28220003843307495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7091769612473438, + "compression_loss": 0.0, + "distillation_loss": 0.4136122167110443, + "epoch": 0.25, + "learning_rate": 4.9841909221361855e-05, + "loss": 0.3988, + "step": 267, + "task_loss": 0.26578381657600403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7092103561625943, + "compression_loss": 0.0, + "distillation_loss": 0.5570213794708252, + "epoch": 0.25, + "learning_rate": 4.9840710571514515e-05, + "loss": 0.5466, + "step": 268, + "task_loss": 0.4525538682937622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7092437428908099, + "compression_loss": 0.0, + "distillation_loss": 0.4353582561016083, + "epoch": 0.26, + "learning_rate": 4.9839507409232464e-05, + "loss": 0.4121, + "step": 269, + "task_loss": 0.20239636301994324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7092771214329945, + "compression_loss": 0.0, + "distillation_loss": 0.5894170999526978, + "epoch": 0.26, + "learning_rate": 4.983829973473426e-05, + "loss": 0.5694, + "step": 270, + "task_loss": 0.38912904262542725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7093104917901515, + "compression_loss": 0.0, + "distillation_loss": 0.5201334953308105, + "epoch": 0.26, + "learning_rate": 4.983708754823929e-05, + "loss": 0.4966, + "step": 271, + "task_loss": 0.284492552280426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7093438539632848, + "compression_loss": 0.0, + "distillation_loss": 0.4675508737564087, + "epoch": 0.26, + "learning_rate": 4.983587084996776e-05, + "loss": 0.4469, + "step": 272, + "task_loss": 0.26147744059562683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.709377207953398, + "compression_loss": 0.0, + "distillation_loss": 0.45866286754608154, + "epoch": 0.26, + "learning_rate": 4.9834649640140664e-05, + "loss": 0.4508, + "step": 273, + "task_loss": 0.37955737113952637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7094105537614946, + "compression_loss": 0.0, + "distillation_loss": 0.4817972183227539, + "epoch": 0.26, + "learning_rate": 4.9833423918979864e-05, + "loss": 0.4607, + "step": 274, + "task_loss": 0.2709713578224182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7094438913885787, + "compression_loss": 0.0, + "distillation_loss": 0.3562793433666229, + "epoch": 0.26, + "learning_rate": 4.983219368670801e-05, + "loss": 0.3413, + "step": 275, + "task_loss": 0.20625412464141846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7094772208356536, + "compression_loss": 0.0, + "distillation_loss": 0.45571157336235046, + "epoch": 0.26, + "learning_rate": 4.983095894354858e-05, + "loss": 0.4336, + "step": 276, + "task_loss": 0.2345903366804123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7095105421037231, + "compression_loss": 0.0, + "distillation_loss": 0.22800546884536743, + "epoch": 0.26, + "learning_rate": 4.9829719689725865e-05, + "loss": 0.2207, + "step": 277, + "task_loss": 0.1548459529876709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.709543855193791, + "compression_loss": 0.0, + "distillation_loss": 0.5845510959625244, + "epoch": 0.26, + "learning_rate": 4.982847592546499e-05, + "loss": 0.564, + "step": 278, + "task_loss": 0.3790714740753174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7095771601068609, + "compression_loss": 0.0, + "distillation_loss": 0.2517128884792328, + "epoch": 0.26, + "learning_rate": 4.982722765099189e-05, + "loss": 0.2349, + "step": 279, + "task_loss": 0.08311676234006882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7096104568439363, + "compression_loss": 0.0, + "distillation_loss": 0.4391753077507019, + "epoch": 0.27, + "learning_rate": 4.982597486653332e-05, + "loss": 0.4248, + "step": 280, + "task_loss": 0.29559123516082764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7096437454060213, + "compression_loss": 0.0, + "distillation_loss": 0.5961909294128418, + "epoch": 0.27, + "learning_rate": 4.982471757231685e-05, + "loss": 0.5709, + "step": 281, + "task_loss": 0.3435242772102356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7096770257941192, + "compression_loss": 0.0, + "distillation_loss": 0.3963918089866638, + "epoch": 0.27, + "learning_rate": 4.982345576857087e-05, + "loss": 0.3802, + "step": 282, + "task_loss": 0.23463018238544464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7097102980092338, + "compression_loss": 0.0, + "distillation_loss": 0.40490058064460754, + "epoch": 0.27, + "learning_rate": 4.9822189455524604e-05, + "loss": 0.3843, + "step": 283, + "task_loss": 0.1989540457725525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7097435620523688, + "compression_loss": 0.0, + "distillation_loss": 0.40912073850631714, + "epoch": 0.27, + "learning_rate": 4.982091863340808e-05, + "loss": 0.3978, + "step": 284, + "task_loss": 0.29543089866638184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7097768179245278, + "compression_loss": 0.0, + "distillation_loss": 0.5951185822486877, + "epoch": 0.27, + "learning_rate": 4.9819643302452146e-05, + "loss": 0.5699, + "step": 285, + "task_loss": 0.3433946371078491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7098100656267148, + "compression_loss": 0.0, + "distillation_loss": 0.45149531960487366, + "epoch": 0.27, + "learning_rate": 4.981836346288847e-05, + "loss": 0.4407, + "step": 286, + "task_loss": 0.3439605236053467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7098433051599331, + "compression_loss": 0.0, + "distillation_loss": 0.9340552687644958, + "epoch": 0.27, + "learning_rate": 4.981707911494955e-05, + "loss": 0.8901, + "step": 287, + "task_loss": 0.4947472810745239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7098765365251865, + "compression_loss": 0.0, + "distillation_loss": 0.40463870763778687, + "epoch": 0.27, + "learning_rate": 4.981579025886868e-05, + "loss": 0.379, + "step": 288, + "task_loss": 0.14829692244529724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7099097597234787, + "compression_loss": 0.0, + "distillation_loss": 0.14002841711044312, + "epoch": 0.27, + "learning_rate": 4.981449689488e-05, + "loss": 0.1338, + "step": 289, + "task_loss": 0.07818258553743362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7099429747558135, + "compression_loss": 0.0, + "distillation_loss": 0.45351576805114746, + "epoch": 0.28, + "learning_rate": 4.981319902321846e-05, + "loss": 0.4276, + "step": 290, + "task_loss": 0.1947600394487381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7099761816231944, + "compression_loss": 0.0, + "distillation_loss": 0.4904792308807373, + "epoch": 0.28, + "learning_rate": 4.981189664411981e-05, + "loss": 0.4701, + "step": 291, + "task_loss": 0.28627243638038635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7100093803266252, + "compression_loss": 0.0, + "distillation_loss": 0.6285742521286011, + "epoch": 0.28, + "learning_rate": 4.981058975782063e-05, + "loss": 0.5967, + "step": 292, + "task_loss": 0.31022369861602783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7100425708671094, + "compression_loss": 0.0, + "distillation_loss": 0.7875237464904785, + "epoch": 0.28, + "learning_rate": 4.9809278364558336e-05, + "loss": 0.7486, + "step": 293, + "task_loss": 0.39871400594711304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.710075753245651, + "compression_loss": 0.0, + "distillation_loss": 0.5977748036384583, + "epoch": 0.28, + "learning_rate": 4.980796246457115e-05, + "loss": 0.5728, + "step": 294, + "task_loss": 0.3479851484298706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7101089274632534, + "compression_loss": 0.0, + "distillation_loss": 0.5126686096191406, + "epoch": 0.28, + "learning_rate": 4.9806642058098105e-05, + "loss": 0.4815, + "step": 295, + "task_loss": 0.20053212344646454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7101420935209204, + "compression_loss": 0.0, + "distillation_loss": 0.5196264982223511, + "epoch": 0.28, + "learning_rate": 4.980531714537905e-05, + "loss": 0.4878, + "step": 296, + "task_loss": 0.2012956738471985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7101752514196557, + "compression_loss": 0.0, + "distillation_loss": 0.33446091413497925, + "epoch": 0.28, + "learning_rate": 4.980398772665468e-05, + "loss": 0.3161, + "step": 297, + "task_loss": 0.15055128931999207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7102084011604629, + "compression_loss": 0.0, + "distillation_loss": 0.6768823862075806, + "epoch": 0.28, + "learning_rate": 4.980265380216649e-05, + "loss": 0.6398, + "step": 298, + "task_loss": 0.30652397871017456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7102415427443457, + "compression_loss": 0.0, + "distillation_loss": 0.3801252245903015, + "epoch": 0.28, + "learning_rate": 4.9801315372156775e-05, + "loss": 0.3637, + "step": 299, + "task_loss": 0.21593311429023743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7102746761723079, + "compression_loss": 0.0, + "distillation_loss": 0.3485510051250458, + "epoch": 0.28, + "learning_rate": 4.979997243686868e-05, + "loss": 0.3279, + "step": 300, + "task_loss": 0.14158813655376434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7103078014453531, + "compression_loss": 0.0, + "distillation_loss": 0.4838918447494507, + "epoch": 0.29, + "learning_rate": 4.979862499654615e-05, + "loss": 0.4596, + "step": 301, + "task_loss": 0.24049346148967743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7103409185644849, + "compression_loss": 0.0, + "distillation_loss": 0.5298963785171509, + "epoch": 0.29, + "learning_rate": 4.9797273051433966e-05, + "loss": 0.4982, + "step": 302, + "task_loss": 0.21265023946762085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7103740275307071, + "compression_loss": 0.0, + "distillation_loss": 0.1947401762008667, + "epoch": 0.29, + "learning_rate": 4.97959166017777e-05, + "loss": 0.1844, + "step": 303, + "task_loss": 0.0909617617726326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7104071283450233, + "compression_loss": 0.0, + "distillation_loss": 0.3400229215621948, + "epoch": 0.29, + "learning_rate": 4.979455564782377e-05, + "loss": 0.3316, + "step": 304, + "task_loss": 0.2561742961406708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7104402210084373, + "compression_loss": 0.0, + "distillation_loss": 0.33108600974082947, + "epoch": 0.29, + "learning_rate": 4.9793190189819395e-05, + "loss": 0.3223, + "step": 305, + "task_loss": 0.24314963817596436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7104733055219526, + "compression_loss": 0.0, + "distillation_loss": 0.270094633102417, + "epoch": 0.29, + "learning_rate": 4.979182022801262e-05, + "loss": 0.2599, + "step": 306, + "task_loss": 0.1684069037437439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7105063818865731, + "compression_loss": 0.0, + "distillation_loss": 0.3968997001647949, + "epoch": 0.29, + "learning_rate": 4.979044576265229e-05, + "loss": 0.3775, + "step": 307, + "task_loss": 0.203133687376976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7105394501033022, + "compression_loss": 0.0, + "distillation_loss": 0.4618909955024719, + "epoch": 0.29, + "learning_rate": 4.9789066793988106e-05, + "loss": 0.4387, + "step": 308, + "task_loss": 0.23047326505184174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7105725101731439, + "compression_loss": 0.0, + "distillation_loss": 0.4338553547859192, + "epoch": 0.29, + "learning_rate": 4.978768332227054e-05, + "loss": 0.4193, + "step": 309, + "task_loss": 0.2878293991088867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7106055620971018, + "compression_loss": 0.0, + "distillation_loss": 0.6526356935501099, + "epoch": 0.29, + "learning_rate": 4.9786295347750936e-05, + "loss": 0.6164, + "step": 310, + "task_loss": 0.29025229811668396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7106386058761793, + "compression_loss": 0.0, + "distillation_loss": 0.7481920123100281, + "epoch": 0.3, + "learning_rate": 4.9784902870681406e-05, + "loss": 0.7157, + "step": 311, + "task_loss": 0.4227951765060425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7106716415113805, + "compression_loss": 0.0, + "distillation_loss": 0.679728627204895, + "epoch": 0.3, + "learning_rate": 4.97835058913149e-05, + "loss": 0.6661, + "step": 312, + "task_loss": 0.5437166690826416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7107046690037088, + "compression_loss": 0.0, + "distillation_loss": 0.6855102777481079, + "epoch": 0.3, + "learning_rate": 4.9782104409905186e-05, + "loss": 0.6504, + "step": 313, + "task_loss": 0.33481907844543457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.710737688354168, + "compression_loss": 0.0, + "distillation_loss": 0.4069738984107971, + "epoch": 0.3, + "learning_rate": 4.9780698426706864e-05, + "loss": 0.3808, + "step": 314, + "task_loss": 0.14478835463523865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7107706995637616, + "compression_loss": 0.0, + "distillation_loss": 0.6615716218948364, + "epoch": 0.3, + "learning_rate": 4.977928794197532e-05, + "loss": 0.6309, + "step": 315, + "task_loss": 0.3549352288246155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7108037026334935, + "compression_loss": 0.0, + "distillation_loss": 0.5655452609062195, + "epoch": 0.3, + "learning_rate": 4.9777872955966785e-05, + "loss": 0.5316, + "step": 316, + "task_loss": 0.22563889622688293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7108366975643673, + "compression_loss": 0.0, + "distillation_loss": 0.3800777792930603, + "epoch": 0.3, + "learning_rate": 4.97764534689383e-05, + "loss": 0.3621, + "step": 317, + "task_loss": 0.20060645043849945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7108696843573867, + "compression_loss": 0.0, + "distillation_loss": 0.2623631954193115, + "epoch": 0.3, + "learning_rate": 4.977502948114772e-05, + "loss": 0.2563, + "step": 318, + "task_loss": 0.20183011889457703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7109026630135554, + "compression_loss": 0.0, + "distillation_loss": 0.4012034833431244, + "epoch": 0.3, + "learning_rate": 4.977360099285371e-05, + "loss": 0.3783, + "step": 319, + "task_loss": 0.17202433943748474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7109356335338771, + "compression_loss": 0.0, + "distillation_loss": 0.35331350564956665, + "epoch": 0.3, + "learning_rate": 4.9772168004315765e-05, + "loss": 0.3308, + "step": 320, + "task_loss": 0.12813310325145721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7109685959193554, + "compression_loss": 0.0, + "distillation_loss": 0.3918440043926239, + "epoch": 0.3, + "learning_rate": 4.9770730515794204e-05, + "loss": 0.3696, + "step": 321, + "task_loss": 0.16927433013916016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.711001550170994, + "compression_loss": 0.0, + "distillation_loss": 0.3784354627132416, + "epoch": 0.31, + "learning_rate": 4.976928852755015e-05, + "loss": 0.3608, + "step": 322, + "task_loss": 0.2020285427570343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7110344962897965, + "compression_loss": 0.0, + "distillation_loss": 0.5241067409515381, + "epoch": 0.31, + "learning_rate": 4.976784203984554e-05, + "loss": 0.501, + "step": 323, + "task_loss": 0.2931115925312042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7110674342767669, + "compression_loss": 0.0, + "distillation_loss": 0.3190663456916809, + "epoch": 0.31, + "learning_rate": 4.976639105294314e-05, + "loss": 0.308, + "step": 324, + "task_loss": 0.2081044614315033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7111003641329084, + "compression_loss": 0.0, + "distillation_loss": 0.3578122854232788, + "epoch": 0.31, + "learning_rate": 4.976493556710653e-05, + "loss": 0.3369, + "step": 325, + "task_loss": 0.14846715331077576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7111332858592252, + "compression_loss": 0.0, + "distillation_loss": 0.4352418780326843, + "epoch": 0.31, + "learning_rate": 4.976347558260011e-05, + "loss": 0.4134, + "step": 326, + "task_loss": 0.21667122840881348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7111661994567205, + "compression_loss": 0.0, + "distillation_loss": 0.36462095379829407, + "epoch": 0.31, + "learning_rate": 4.976201109968908e-05, + "loss": 0.3506, + "step": 327, + "task_loss": 0.2242008000612259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7111991049263984, + "compression_loss": 0.0, + "distillation_loss": 0.5992603302001953, + "epoch": 0.31, + "learning_rate": 4.976054211863949e-05, + "loss": 0.5782, + "step": 328, + "task_loss": 0.38846805691719055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7112320022692623, + "compression_loss": 0.0, + "distillation_loss": 0.1498991847038269, + "epoch": 0.31, + "learning_rate": 4.9759068639718166e-05, + "loss": 0.1485, + "step": 329, + "task_loss": 0.13616728782653809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.711264891486316, + "compression_loss": 0.0, + "distillation_loss": 0.7137056589126587, + "epoch": 0.31, + "learning_rate": 4.975759066319278e-05, + "loss": 0.6852, + "step": 330, + "task_loss": 0.4287063479423523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7112977725785632, + "compression_loss": 0.0, + "distillation_loss": 0.3997446298599243, + "epoch": 0.31, + "learning_rate": 4.9756108189331825e-05, + "loss": 0.385, + "step": 331, + "task_loss": 0.2526022791862488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7113306455470074, + "compression_loss": 0.0, + "distillation_loss": 0.6099421977996826, + "epoch": 0.32, + "learning_rate": 4.975462121840458e-05, + "loss": 0.5846, + "step": 332, + "task_loss": 0.35605889558792114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7113635103926526, + "compression_loss": 0.0, + "distillation_loss": 0.3887147605419159, + "epoch": 0.32, + "learning_rate": 4.975312975068118e-05, + "loss": 0.3771, + "step": 333, + "task_loss": 0.27239084243774414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7113963671165022, + "compression_loss": 0.0, + "distillation_loss": 0.3457412123680115, + "epoch": 0.32, + "learning_rate": 4.975163378643255e-05, + "loss": 0.3356, + "step": 334, + "task_loss": 0.2438831776380539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7114292157195601, + "compression_loss": 0.0, + "distillation_loss": 0.26972734928131104, + "epoch": 0.32, + "learning_rate": 4.975013332593044e-05, + "loss": 0.2563, + "step": 335, + "task_loss": 0.13514116406440735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7114620562028297, + "compression_loss": 0.0, + "distillation_loss": 0.269406259059906, + "epoch": 0.32, + "learning_rate": 4.97486283694474e-05, + "loss": 0.2526, + "step": 336, + "task_loss": 0.10177846252918243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7114948885673149, + "compression_loss": 0.0, + "distillation_loss": 0.5931138396263123, + "epoch": 0.32, + "learning_rate": 4.974711891725684e-05, + "loss": 0.5679, + "step": 337, + "task_loss": 0.34124326705932617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7115277128140194, + "compression_loss": 0.0, + "distillation_loss": 0.12281601130962372, + "epoch": 0.32, + "learning_rate": 4.9745604969632934e-05, + "loss": 0.1292, + "step": 338, + "task_loss": 0.18707461655139923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7115605289439467, + "compression_loss": 0.0, + "distillation_loss": 0.6009297966957092, + "epoch": 0.32, + "learning_rate": 4.974408652685072e-05, + "loss": 0.5704, + "step": 339, + "task_loss": 0.296112984418869 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7115933369581007, + "compression_loss": 0.0, + "distillation_loss": 0.6147419214248657, + "epoch": 0.32, + "learning_rate": 4.974256358918601e-05, + "loss": 0.586, + "step": 340, + "task_loss": 0.3271123766899109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7116261368574849, + "compression_loss": 0.0, + "distillation_loss": 0.5920723080635071, + "epoch": 0.32, + "learning_rate": 4.9741036156915464e-05, + "loss": 0.565, + "step": 341, + "task_loss": 0.32182377576828003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7116589286431031, + "compression_loss": 0.0, + "distillation_loss": 0.2902904450893402, + "epoch": 0.32, + "learning_rate": 4.973950423031655e-05, + "loss": 0.2809, + "step": 342, + "task_loss": 0.19625771045684814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.711691712315959, + "compression_loss": 0.0, + "distillation_loss": 0.674541711807251, + "epoch": 0.33, + "learning_rate": 4.9737967809667546e-05, + "loss": 0.6477, + "step": 343, + "task_loss": 0.4057275056838989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7117244878770561, + "compression_loss": 0.0, + "distillation_loss": 0.18200403451919556, + "epoch": 0.33, + "learning_rate": 4.9736426895247545e-05, + "loss": 0.1747, + "step": 344, + "task_loss": 0.10881594568490982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7117572553273983, + "compression_loss": 0.0, + "distillation_loss": 0.5339623093605042, + "epoch": 0.33, + "learning_rate": 4.973488148733647e-05, + "loss": 0.5104, + "step": 345, + "task_loss": 0.29824596643447876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7117900146679892, + "compression_loss": 0.0, + "distillation_loss": 0.5717617273330688, + "epoch": 0.33, + "learning_rate": 4.973333158621505e-05, + "loss": 0.553, + "step": 346, + "task_loss": 0.3842325806617737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7118227658998324, + "compression_loss": 0.0, + "distillation_loss": 0.4612230062484741, + "epoch": 0.33, + "learning_rate": 4.973177719216483e-05, + "loss": 0.4341, + "step": 347, + "task_loss": 0.1896570473909378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7118555090239316, + "compression_loss": 0.0, + "distillation_loss": 0.383368581533432, + "epoch": 0.33, + "learning_rate": 4.973021830546817e-05, + "loss": 0.3685, + "step": 348, + "task_loss": 0.23502130806446075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7118882440412907, + "compression_loss": 0.0, + "distillation_loss": 0.33486291766166687, + "epoch": 0.33, + "learning_rate": 4.972865492640826e-05, + "loss": 0.3254, + "step": 349, + "task_loss": 0.24004624783992767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7119209709529131, + "compression_loss": 0.0, + "distillation_loss": 0.37039846181869507, + "epoch": 0.33, + "learning_rate": 4.972708705526908e-05, + "loss": 0.3512, + "step": 350, + "task_loss": 0.1785244345664978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7119536897598027, + "compression_loss": 0.0, + "distillation_loss": 0.4161568284034729, + "epoch": 0.33, + "learning_rate": 4.972551469233545e-05, + "loss": 0.3904, + "step": 351, + "task_loss": 0.15905624628067017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7119864004629629, + "compression_loss": 0.0, + "distillation_loss": 0.4145691394805908, + "epoch": 0.33, + "learning_rate": 4.9723937837892996e-05, + "loss": 0.3912, + "step": 352, + "task_loss": 0.18065348267555237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7120191030633978, + "compression_loss": 0.0, + "distillation_loss": 0.262037992477417, + "epoch": 0.34, + "learning_rate": 4.972235649222817e-05, + "loss": 0.2493, + "step": 353, + "task_loss": 0.13479451835155487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7120517975621107, + "compression_loss": 0.0, + "distillation_loss": 0.3872963786125183, + "epoch": 0.34, + "learning_rate": 4.972077065562821e-05, + "loss": 0.3693, + "step": 354, + "task_loss": 0.2070463001728058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7120844839601054, + "compression_loss": 0.0, + "distillation_loss": 0.48908573389053345, + "epoch": 0.34, + "learning_rate": 4.971918032838122e-05, + "loss": 0.4654, + "step": 355, + "task_loss": 0.2519568204879761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7121171622583856, + "compression_loss": 0.0, + "distillation_loss": 0.15656299889087677, + "epoch": 0.34, + "learning_rate": 4.9717585510776065e-05, + "loss": 0.1556, + "step": 356, + "task_loss": 0.1469275802373886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.712149832457955, + "compression_loss": 0.0, + "distillation_loss": 0.42582622170448303, + "epoch": 0.34, + "learning_rate": 4.971598620310246e-05, + "loss": 0.4109, + "step": 357, + "task_loss": 0.27654868364334106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7121824945598174, + "compression_loss": 0.0, + "distillation_loss": 0.4828101396560669, + "epoch": 0.34, + "learning_rate": 4.9714382405650926e-05, + "loss": 0.4592, + "step": 358, + "task_loss": 0.24672147631645203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7122151485649763, + "compression_loss": 0.0, + "distillation_loss": 0.2761083245277405, + "epoch": 0.34, + "learning_rate": 4.971277411871281e-05, + "loss": 0.266, + "step": 359, + "task_loss": 0.17460951209068298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7122477944744354, + "compression_loss": 0.0, + "distillation_loss": 0.5871754884719849, + "epoch": 0.34, + "learning_rate": 4.971116134258025e-05, + "loss": 0.5566, + "step": 360, + "task_loss": 0.28161656856536865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7122804322891985, + "compression_loss": 0.0, + "distillation_loss": 0.4658134877681732, + "epoch": 0.34, + "learning_rate": 4.9709544077546235e-05, + "loss": 0.4412, + "step": 361, + "task_loss": 0.21951305866241455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7123130620102691, + "compression_loss": 0.0, + "distillation_loss": 0.4061258137226105, + "epoch": 0.34, + "learning_rate": 4.9707922323904524e-05, + "loss": 0.3812, + "step": 362, + "task_loss": 0.1569288671016693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.712345683638651, + "compression_loss": 0.0, + "distillation_loss": 0.4409645199775696, + "epoch": 0.34, + "learning_rate": 4.9706296081949724e-05, + "loss": 0.4208, + "step": 363, + "task_loss": 0.23960046470165253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7123782971753478, + "compression_loss": 0.0, + "distillation_loss": 0.13879182934761047, + "epoch": 0.35, + "learning_rate": 4.9704665351977266e-05, + "loss": 0.1371, + "step": 364, + "task_loss": 0.12139101326465607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7124109026213634, + "compression_loss": 0.0, + "distillation_loss": 0.5069670677185059, + "epoch": 0.35, + "learning_rate": 4.9703030134283356e-05, + "loss": 0.4817, + "step": 365, + "task_loss": 0.25386273860931396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7124434999777013, + "compression_loss": 0.0, + "distillation_loss": 0.46004921197891235, + "epoch": 0.35, + "learning_rate": 4.970139042916506e-05, + "loss": 0.4276, + "step": 366, + "task_loss": 0.13600590825080872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7124760892453651, + "compression_loss": 0.0, + "distillation_loss": 0.47816845774650574, + "epoch": 0.35, + "learning_rate": 4.969974623692023e-05, + "loss": 0.4559, + "step": 367, + "task_loss": 0.2554030418395996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7125086704253587, + "compression_loss": 0.0, + "distillation_loss": 0.5051754117012024, + "epoch": 0.35, + "learning_rate": 4.969809755784753e-05, + "loss": 0.4801, + "step": 368, + "task_loss": 0.2545245885848999 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7125412435186856, + "compression_loss": 0.0, + "distillation_loss": 0.5262600183486938, + "epoch": 0.35, + "learning_rate": 4.969644439224647e-05, + "loss": 0.5022, + "step": 369, + "task_loss": 0.28538602590560913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7125738085263497, + "compression_loss": 0.0, + "distillation_loss": 0.32377320528030396, + "epoch": 0.35, + "learning_rate": 4.969478674041735e-05, + "loss": 0.3042, + "step": 370, + "task_loss": 0.12757205963134766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7126063654493544, + "compression_loss": 0.0, + "distillation_loss": 0.31874191761016846, + "epoch": 0.35, + "learning_rate": 4.969312460266128e-05, + "loss": 0.3064, + "step": 371, + "task_loss": 0.19505202770233154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7126389142887035, + "compression_loss": 0.0, + "distillation_loss": 0.3652954697608948, + "epoch": 0.35, + "learning_rate": 4.969145797928021e-05, + "loss": 0.3444, + "step": 372, + "task_loss": 0.156343013048172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7126714550454009, + "compression_loss": 0.0, + "distillation_loss": 0.19748246669769287, + "epoch": 0.35, + "learning_rate": 4.968978687057687e-05, + "loss": 0.1868, + "step": 373, + "task_loss": 0.09101182222366333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7127039877204498, + "compression_loss": 0.0, + "distillation_loss": 0.5082756876945496, + "epoch": 0.36, + "learning_rate": 4.9688111276854846e-05, + "loss": 0.4828, + "step": 374, + "task_loss": 0.2530589699745178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7127365123148545, + "compression_loss": 0.0, + "distillation_loss": 0.48118260502815247, + "epoch": 0.36, + "learning_rate": 4.9686431198418515e-05, + "loss": 0.4566, + "step": 375, + "task_loss": 0.23539604246616364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7127690288296181, + "compression_loss": 0.0, + "distillation_loss": 0.43502384424209595, + "epoch": 0.36, + "learning_rate": 4.968474663557306e-05, + "loss": 0.4128, + "step": 376, + "task_loss": 0.21273593604564667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7128015372657446, + "compression_loss": 0.0, + "distillation_loss": 0.4908156096935272, + "epoch": 0.36, + "learning_rate": 4.9683057588624494e-05, + "loss": 0.4682, + "step": 377, + "task_loss": 0.26497960090637207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7128340376242377, + "compression_loss": 0.0, + "distillation_loss": 0.5738304853439331, + "epoch": 0.36, + "learning_rate": 4.968136405787964e-05, + "loss": 0.5737, + "step": 378, + "task_loss": 0.5724409818649292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7128665299061009, + "compression_loss": 0.0, + "distillation_loss": 0.31743335723876953, + "epoch": 0.36, + "learning_rate": 4.967966604364614e-05, + "loss": 0.3031, + "step": 379, + "task_loss": 0.17429381608963013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.712899014112338, + "compression_loss": 0.0, + "distillation_loss": 0.6811038851737976, + "epoch": 0.36, + "learning_rate": 4.9677963546232445e-05, + "loss": 0.651, + "step": 380, + "task_loss": 0.3800850212574005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7129314902439526, + "compression_loss": 0.0, + "distillation_loss": 0.5749964714050293, + "epoch": 0.36, + "learning_rate": 4.967625656594782e-05, + "loss": 0.5545, + "step": 381, + "task_loss": 0.3700599670410156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7129639583019486, + "compression_loss": 0.0, + "distillation_loss": 0.5684269666671753, + "epoch": 0.36, + "learning_rate": 4.967454510310235e-05, + "loss": 0.5486, + "step": 382, + "task_loss": 0.3700346052646637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7129964182873294, + "compression_loss": 0.0, + "distillation_loss": 0.42811310291290283, + "epoch": 0.36, + "learning_rate": 4.967282915800693e-05, + "loss": 0.4135, + "step": 383, + "task_loss": 0.2819935083389282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7130288702010988, + "compression_loss": 0.0, + "distillation_loss": 0.5204276442527771, + "epoch": 0.36, + "learning_rate": 4.9671108730973274e-05, + "loss": 0.4932, + "step": 384, + "task_loss": 0.24831140041351318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7130613140442605, + "compression_loss": 0.0, + "distillation_loss": 0.33399271965026855, + "epoch": 0.37, + "learning_rate": 4.9669383822313886e-05, + "loss": 0.3222, + "step": 385, + "task_loss": 0.21643346548080444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7130937498178183, + "compression_loss": 0.0, + "distillation_loss": 0.318186491727829, + "epoch": 0.37, + "learning_rate": 4.966765443234212e-05, + "loss": 0.3105, + "step": 386, + "task_loss": 0.24149882793426514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7131261775227756, + "compression_loss": 0.0, + "distillation_loss": 0.20772957801818848, + "epoch": 0.37, + "learning_rate": 4.966592056137213e-05, + "loss": 0.2072, + "step": 387, + "task_loss": 0.20199303328990936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7131585971601364, + "compression_loss": 0.0, + "distillation_loss": 0.29850098490715027, + "epoch": 0.37, + "learning_rate": 4.966418220971888e-05, + "loss": 0.2825, + "step": 388, + "task_loss": 0.13806317746639252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.713191008730904, + "compression_loss": 0.0, + "distillation_loss": 0.555107831954956, + "epoch": 0.37, + "learning_rate": 4.9662439377698145e-05, + "loss": 0.5276, + "step": 389, + "task_loss": 0.2797929644584656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7132234122360824, + "compression_loss": 0.0, + "distillation_loss": 0.31871169805526733, + "epoch": 0.37, + "learning_rate": 4.9660692065626515e-05, + "loss": 0.3048, + "step": 390, + "task_loss": 0.17996791005134583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7132558076766753, + "compression_loss": 0.0, + "distillation_loss": 0.34103503823280334, + "epoch": 0.37, + "learning_rate": 4.965894027382141e-05, + "loss": 0.3362, + "step": 391, + "task_loss": 0.29244738817214966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7132881950536861, + "compression_loss": 0.0, + "distillation_loss": 0.42130306363105774, + "epoch": 0.37, + "learning_rate": 4.965718400260105e-05, + "loss": 0.4119, + "step": 392, + "task_loss": 0.32684704661369324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7133205743681188, + "compression_loss": 0.0, + "distillation_loss": 0.252642959356308, + "epoch": 0.37, + "learning_rate": 4.965542325228446e-05, + "loss": 0.2442, + "step": 393, + "task_loss": 0.16772150993347168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7133529456209768, + "compression_loss": 0.0, + "distillation_loss": 0.18364742398262024, + "epoch": 0.37, + "learning_rate": 4.96536580231915e-05, + "loss": 0.172, + "step": 394, + "task_loss": 0.06767023354768753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7133853088132639, + "compression_loss": 0.0, + "distillation_loss": 0.40755707025527954, + "epoch": 0.38, + "learning_rate": 4.9651888315642815e-05, + "loss": 0.3952, + "step": 395, + "task_loss": 0.2839694619178772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7134176639459838, + "compression_loss": 0.0, + "distillation_loss": 0.5597716569900513, + "epoch": 0.38, + "learning_rate": 4.96501141299599e-05, + "loss": 0.5393, + "step": 396, + "task_loss": 0.35467255115509033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7134500110201403, + "compression_loss": 0.0, + "distillation_loss": 0.44882553815841675, + "epoch": 0.38, + "learning_rate": 4.9648335466465035e-05, + "loss": 0.4253, + "step": 397, + "task_loss": 0.21373212337493896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7134823500367369, + "compression_loss": 0.0, + "distillation_loss": 0.562800407409668, + "epoch": 0.38, + "learning_rate": 4.964655232548133e-05, + "loss": 0.5389, + "step": 398, + "task_loss": 0.3235562741756439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7135146809967773, + "compression_loss": 0.0, + "distillation_loss": 0.43563634157180786, + "epoch": 0.38, + "learning_rate": 4.964476470733269e-05, + "loss": 0.4108, + "step": 399, + "task_loss": 0.18746916949748993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7135470039012652, + "compression_loss": 0.0, + "distillation_loss": 0.5361025333404541, + "epoch": 0.38, + "learning_rate": 4.964297261234385e-05, + "loss": 0.5201, + "step": 400, + "task_loss": 0.3763747215270996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7135793187512043, + "compression_loss": 0.0, + "distillation_loss": 0.40355557203292847, + "epoch": 0.38, + "learning_rate": 4.964117604084036e-05, + "loss": 0.3828, + "step": 401, + "task_loss": 0.19647303223609924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7136116255475984, + "compression_loss": 0.0, + "distillation_loss": 0.4803870916366577, + "epoch": 0.38, + "learning_rate": 4.963937499314857e-05, + "loss": 0.4581, + "step": 402, + "task_loss": 0.25726521015167236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.713643924291451, + "compression_loss": 0.0, + "distillation_loss": 0.6330090761184692, + "epoch": 0.38, + "learning_rate": 4.963756946959564e-05, + "loss": 0.6021, + "step": 403, + "task_loss": 0.3235839009284973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7136762149837658, + "compression_loss": 0.0, + "distillation_loss": 0.40180158615112305, + "epoch": 0.38, + "learning_rate": 4.9635759470509554e-05, + "loss": 0.3963, + "step": 404, + "task_loss": 0.3472459614276886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7137084976255466, + "compression_loss": 0.0, + "distillation_loss": 0.29341554641723633, + "epoch": 0.38, + "learning_rate": 4.9633944996219125e-05, + "loss": 0.2771, + "step": 405, + "task_loss": 0.12987171113491058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.713740772217797, + "compression_loss": 0.0, + "distillation_loss": 0.23439250886440277, + "epoch": 0.39, + "learning_rate": 4.9632126047053954e-05, + "loss": 0.2184, + "step": 406, + "task_loss": 0.07485216110944748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7137730387615207, + "compression_loss": 0.0, + "distillation_loss": 0.2850918769836426, + "epoch": 0.39, + "learning_rate": 4.963030262334445e-05, + "loss": 0.2713, + "step": 407, + "task_loss": 0.14699222147464752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7138052972577213, + "compression_loss": 0.0, + "distillation_loss": 0.392857164144516, + "epoch": 0.39, + "learning_rate": 4.962847472542185e-05, + "loss": 0.3697, + "step": 408, + "task_loss": 0.16155928373336792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7138375477074026, + "compression_loss": 0.0, + "distillation_loss": 0.3292420208454132, + "epoch": 0.39, + "learning_rate": 4.96266423536182e-05, + "loss": 0.3101, + "step": 409, + "task_loss": 0.13783293962478638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7138697901115683, + "compression_loss": 0.0, + "distillation_loss": 0.5401057600975037, + "epoch": 0.39, + "learning_rate": 4.9624805508266375e-05, + "loss": 0.5129, + "step": 410, + "task_loss": 0.26759254932403564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.713902024471222, + "compression_loss": 0.0, + "distillation_loss": 0.4787713289260864, + "epoch": 0.39, + "learning_rate": 4.9622964189700026e-05, + "loss": 0.4574, + "step": 411, + "task_loss": 0.2650720477104187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7139342507873674, + "compression_loss": 0.0, + "distillation_loss": 0.3292551040649414, + "epoch": 0.39, + "learning_rate": 4.962111839825365e-05, + "loss": 0.3079, + "step": 412, + "task_loss": 0.11532945930957794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7139664690610081, + "compression_loss": 0.0, + "distillation_loss": 0.32782095670700073, + "epoch": 0.39, + "learning_rate": 4.961926813426254e-05, + "loss": 0.3213, + "step": 413, + "task_loss": 0.2627405524253845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.713998679293148, + "compression_loss": 0.0, + "distillation_loss": 0.4790850579738617, + "epoch": 0.39, + "learning_rate": 4.9617413398062814e-05, + "loss": 0.4547, + "step": 414, + "task_loss": 0.23543286323547363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7140308814847907, + "compression_loss": 0.0, + "distillation_loss": 0.2713318467140198, + "epoch": 0.39, + "learning_rate": 4.9615554189991374e-05, + "loss": 0.2603, + "step": 415, + "task_loss": 0.16122034192085266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7140630756369397, + "compression_loss": 0.0, + "distillation_loss": 0.32149723172187805, + "epoch": 0.4, + "learning_rate": 4.9613690510385965e-05, + "loss": 0.303, + "step": 416, + "task_loss": 0.13658498227596283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7140952617505988, + "compression_loss": 0.0, + "distillation_loss": 0.22573064267635345, + "epoch": 0.4, + "learning_rate": 4.961182235958515e-05, + "loss": 0.2124, + "step": 417, + "task_loss": 0.09292272478342056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7141274398267717, + "compression_loss": 0.0, + "distillation_loss": 0.7057199478149414, + "epoch": 0.4, + "learning_rate": 4.9609949737928254e-05, + "loss": 0.6774, + "step": 418, + "task_loss": 0.42263519763946533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7141596098664622, + "compression_loss": 0.0, + "distillation_loss": 0.2580406963825226, + "epoch": 0.4, + "learning_rate": 4.9608072645755476e-05, + "loss": 0.2494, + "step": 419, + "task_loss": 0.17207808792591095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7141917718706738, + "compression_loss": 0.0, + "distillation_loss": 0.297884464263916, + "epoch": 0.4, + "learning_rate": 4.960619108340778e-05, + "loss": 0.2873, + "step": 420, + "task_loss": 0.19168856739997864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7142239258404103, + "compression_loss": 0.0, + "distillation_loss": 0.10459847003221512, + "epoch": 0.4, + "learning_rate": 4.9604305051226976e-05, + "loss": 0.0963, + "step": 421, + "task_loss": 0.0215504951775074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7142560717766753, + "compression_loss": 0.0, + "distillation_loss": 0.2625170648097992, + "epoch": 0.4, + "learning_rate": 4.960241454955566e-05, + "loss": 0.2528, + "step": 422, + "task_loss": 0.1657557189464569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7142882096804724, + "compression_loss": 0.0, + "distillation_loss": 0.49204713106155396, + "epoch": 0.4, + "learning_rate": 4.960051957873725e-05, + "loss": 0.4776, + "step": 423, + "task_loss": 0.34725382924079895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7143203395528055, + "compression_loss": 0.0, + "distillation_loss": 0.20165809988975525, + "epoch": 0.4, + "learning_rate": 4.959862013911599e-05, + "loss": 0.1876, + "step": 424, + "task_loss": 0.06141514703631401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7143524613946781, + "compression_loss": 0.0, + "distillation_loss": 0.24638772010803223, + "epoch": 0.4, + "learning_rate": 4.959671623103691e-05, + "loss": 0.2331, + "step": 425, + "task_loss": 0.113340824842453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7143845752070941, + "compression_loss": 0.0, + "distillation_loss": 0.7320840358734131, + "epoch": 0.4, + "learning_rate": 4.959480785484587e-05, + "loss": 0.7126, + "step": 426, + "task_loss": 0.53769451379776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7144166809910568, + "compression_loss": 0.0, + "distillation_loss": 0.3415003716945648, + "epoch": 0.41, + "learning_rate": 4.959289501088953e-05, + "loss": 0.3285, + "step": 427, + "task_loss": 0.21169662475585938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7144487787475703, + "compression_loss": 0.0, + "distillation_loss": 0.3320407271385193, + "epoch": 0.41, + "learning_rate": 4.9590977699515374e-05, + "loss": 0.3184, + "step": 428, + "task_loss": 0.19590842723846436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7144808684776381, + "compression_loss": 0.0, + "distillation_loss": 0.11836206912994385, + "epoch": 0.41, + "learning_rate": 4.958905592107168e-05, + "loss": 0.1116, + "step": 429, + "task_loss": 0.05044740438461304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7145129501822638, + "compression_loss": 0.0, + "distillation_loss": 0.6588773727416992, + "epoch": 0.41, + "learning_rate": 4.958712967590756e-05, + "loss": 0.6266, + "step": 430, + "task_loss": 0.3364851772785187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7145450238624512, + "compression_loss": 0.0, + "distillation_loss": 0.3294253945350647, + "epoch": 0.41, + "learning_rate": 4.9585198964372925e-05, + "loss": 0.3151, + "step": 431, + "task_loss": 0.1865476369857788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7145770895192041, + "compression_loss": 0.0, + "distillation_loss": 0.41862189769744873, + "epoch": 0.41, + "learning_rate": 4.958326378681849e-05, + "loss": 0.3942, + "step": 432, + "task_loss": 0.17390699684619904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7146091471535259, + "compression_loss": 0.0, + "distillation_loss": 0.6988288164138794, + "epoch": 0.41, + "learning_rate": 4.958132414359579e-05, + "loss": 0.6708, + "step": 433, + "task_loss": 0.41860806941986084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7146411967664204, + "compression_loss": 0.0, + "distillation_loss": 0.5082677006721497, + "epoch": 0.41, + "learning_rate": 4.957938003505718e-05, + "loss": 0.4901, + "step": 434, + "task_loss": 0.3265720009803772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7146732383588912, + "compression_loss": 0.0, + "distillation_loss": 0.43779420852661133, + "epoch": 0.41, + "learning_rate": 4.957743146155581e-05, + "loss": 0.4178, + "step": 435, + "task_loss": 0.23794850707054138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7147052719319422, + "compression_loss": 0.0, + "distillation_loss": 0.33255505561828613, + "epoch": 0.41, + "learning_rate": 4.9575478423445655e-05, + "loss": 0.3063, + "step": 436, + "task_loss": 0.07019799202680588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.714737297486577, + "compression_loss": 0.0, + "distillation_loss": 0.18722805380821228, + "epoch": 0.42, + "learning_rate": 4.957352092108148e-05, + "loss": 0.1767, + "step": 437, + "task_loss": 0.08177616447210312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7147693150237991, + "compression_loss": 0.0, + "distillation_loss": 0.44641682505607605, + "epoch": 0.42, + "learning_rate": 4.957155895481889e-05, + "loss": 0.4228, + "step": 438, + "task_loss": 0.2099793255329132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7148013245446124, + "compression_loss": 0.0, + "distillation_loss": 0.6220138072967529, + "epoch": 0.42, + "learning_rate": 4.956959252501426e-05, + "loss": 0.5978, + "step": 439, + "task_loss": 0.3797354996204376 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7148333260500205, + "compression_loss": 0.0, + "distillation_loss": 0.37998977303504944, + "epoch": 0.42, + "learning_rate": 4.956762163202484e-05, + "loss": 0.3565, + "step": 440, + "task_loss": 0.1454060971736908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7148653195410271, + "compression_loss": 0.0, + "distillation_loss": 0.5865918397903442, + "epoch": 0.42, + "learning_rate": 4.956564627620862e-05, + "loss": 0.5546, + "step": 441, + "task_loss": 0.2664667069911957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7148973050186359, + "compression_loss": 0.0, + "distillation_loss": 0.36456751823425293, + "epoch": 0.42, + "learning_rate": 4.956366645792445e-05, + "loss": 0.3419, + "step": 442, + "task_loss": 0.1381196826696396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7149292824838505, + "compression_loss": 0.0, + "distillation_loss": 0.3425045609474182, + "epoch": 0.42, + "learning_rate": 4.956168217753197e-05, + "loss": 0.3388, + "step": 443, + "task_loss": 0.3051426112651825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7149612519376747, + "compression_loss": 0.0, + "distillation_loss": 0.28074485063552856, + "epoch": 0.42, + "learning_rate": 4.955969343539162e-05, + "loss": 0.2671, + "step": 444, + "task_loss": 0.14405837655067444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.714993213381112, + "compression_loss": 0.0, + "distillation_loss": 0.16746632754802704, + "epoch": 0.42, + "learning_rate": 4.955770023186469e-05, + "loss": 0.1645, + "step": 445, + "task_loss": 0.1381833851337433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7150251668151663, + "compression_loss": 0.0, + "distillation_loss": 0.3588885962963104, + "epoch": 0.42, + "learning_rate": 4.9555702567313235e-05, + "loss": 0.3396, + "step": 446, + "task_loss": 0.16621676087379456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7150571122408411, + "compression_loss": 0.0, + "distillation_loss": 0.26380306482315063, + "epoch": 0.42, + "learning_rate": 4.9553700442100146e-05, + "loss": 0.2656, + "step": 447, + "task_loss": 0.28156140446662903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7150890496591402, + "compression_loss": 0.0, + "distillation_loss": 0.5757080912590027, + "epoch": 0.43, + "learning_rate": 4.955169385658912e-05, + "loss": 0.5604, + "step": 448, + "task_loss": 0.42307889461517334 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7151209790710672, + "compression_loss": 0.0, + "distillation_loss": 0.4960484802722931, + "epoch": 0.43, + "learning_rate": 4.954968281114467e-05, + "loss": 0.4804, + "step": 449, + "task_loss": 0.33929193019866943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7151529004776259, + "compression_loss": 0.0, + "distillation_loss": 0.6576845645904541, + "epoch": 0.43, + "learning_rate": 4.9547667306132096e-05, + "loss": 0.6253, + "step": 450, + "task_loss": 0.33397895097732544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7151848138798199, + "compression_loss": 0.0, + "distillation_loss": 0.30960074067115784, + "epoch": 0.43, + "learning_rate": 4.954564734191753e-05, + "loss": 0.2977, + "step": 451, + "task_loss": 0.1903936266899109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7152167192786528, + "compression_loss": 0.0, + "distillation_loss": 0.1117134764790535, + "epoch": 0.43, + "learning_rate": 4.9543622918867926e-05, + "loss": 0.106, + "step": 452, + "task_loss": 0.05442768707871437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7152486166751285, + "compression_loss": 0.0, + "distillation_loss": 0.34328725934028625, + "epoch": 0.43, + "learning_rate": 4.9541594037351e-05, + "loss": 0.3272, + "step": 453, + "task_loss": 0.1819160431623459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7152805060702505, + "compression_loss": 0.0, + "distillation_loss": 0.6085219979286194, + "epoch": 0.43, + "learning_rate": 4.953956069773534e-05, + "loss": 0.5771, + "step": 454, + "task_loss": 0.29387348890304565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7153123874650225, + "compression_loss": 0.0, + "distillation_loss": 0.2967539429664612, + "epoch": 0.43, + "learning_rate": 4.953752290039028e-05, + "loss": 0.2804, + "step": 455, + "task_loss": 0.13335244357585907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7153442608604482, + "compression_loss": 0.0, + "distillation_loss": 0.3782120943069458, + "epoch": 0.43, + "learning_rate": 4.953548064568602e-05, + "loss": 0.3664, + "step": 456, + "task_loss": 0.2596074044704437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7153761262575314, + "compression_loss": 0.0, + "distillation_loss": 0.21655282378196716, + "epoch": 0.43, + "learning_rate": 4.953343393399354e-05, + "loss": 0.2041, + "step": 457, + "task_loss": 0.09245874732732773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7154079836572756, + "compression_loss": 0.0, + "distillation_loss": 0.4280490279197693, + "epoch": 0.43, + "learning_rate": 4.953138276568462e-05, + "loss": 0.4155, + "step": 458, + "task_loss": 0.3028221130371094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7154398330606846, + "compression_loss": 0.0, + "distillation_loss": 0.6100641489028931, + "epoch": 0.44, + "learning_rate": 4.952932714113188e-05, + "loss": 0.5865, + "step": 459, + "task_loss": 0.3739990293979645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7154716744687621, + "compression_loss": 0.0, + "distillation_loss": 0.43047159910202026, + "epoch": 0.44, + "learning_rate": 4.9527267060708734e-05, + "loss": 0.4066, + "step": 460, + "task_loss": 0.1912587583065033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7155035078825116, + "compression_loss": 0.0, + "distillation_loss": 0.5423746109008789, + "epoch": 0.44, + "learning_rate": 4.9525202524789397e-05, + "loss": 0.5169, + "step": 461, + "task_loss": 0.28800442814826965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7155353333029371, + "compression_loss": 0.0, + "distillation_loss": 0.2084915190935135, + "epoch": 0.44, + "learning_rate": 4.952313353374891e-05, + "loss": 0.1945, + "step": 462, + "task_loss": 0.06886687129735947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7155671507310419, + "compression_loss": 0.0, + "distillation_loss": 0.36711233854293823, + "epoch": 0.44, + "learning_rate": 4.952106008796311e-05, + "loss": 0.3632, + "step": 463, + "task_loss": 0.3277187943458557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.71559896016783, + "compression_loss": 0.0, + "distillation_loss": 0.25077369809150696, + "epoch": 0.44, + "learning_rate": 4.9518982187808653e-05, + "loss": 0.2342, + "step": 464, + "task_loss": 0.08512859791517258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7156307616143049, + "compression_loss": 0.0, + "distillation_loss": 0.3569698929786682, + "epoch": 0.44, + "learning_rate": 4.9516899833663e-05, + "loss": 0.3385, + "step": 465, + "task_loss": 0.17190499603748322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7156625550714704, + "compression_loss": 0.0, + "distillation_loss": 0.2773820161819458, + "epoch": 0.44, + "learning_rate": 4.9514813025904413e-05, + "loss": 0.2626, + "step": 466, + "task_loss": 0.12949100136756897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.71569434054033, + "compression_loss": 0.0, + "distillation_loss": 0.4447813630104065, + "epoch": 0.44, + "learning_rate": 4.951272176491197e-05, + "loss": 0.4288, + "step": 467, + "task_loss": 0.28465670347213745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7157261180218876, + "compression_loss": 0.0, + "distillation_loss": 0.41461753845214844, + "epoch": 0.44, + "learning_rate": 4.951062605106557e-05, + "loss": 0.4048, + "step": 468, + "task_loss": 0.3163371682167053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7157578875171468, + "compression_loss": 0.0, + "distillation_loss": 0.559532642364502, + "epoch": 0.45, + "learning_rate": 4.950852588474591e-05, + "loss": 0.5354, + "step": 469, + "task_loss": 0.31830355525016785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7157896490271112, + "compression_loss": 0.0, + "distillation_loss": 0.28237810730934143, + "epoch": 0.45, + "learning_rate": 4.9506421266334475e-05, + "loss": 0.2687, + "step": 470, + "task_loss": 0.14537358283996582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7158214025527846, + "compression_loss": 0.0, + "distillation_loss": 0.2132379114627838, + "epoch": 0.45, + "learning_rate": 4.9504312196213596e-05, + "loss": 0.2078, + "step": 471, + "task_loss": 0.15892483294010162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7158531480951705, + "compression_loss": 0.0, + "distillation_loss": 0.205579936504364, + "epoch": 0.45, + "learning_rate": 4.95021986747664e-05, + "loss": 0.1931, + "step": 472, + "task_loss": 0.08082406967878342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7158848856552729, + "compression_loss": 0.0, + "distillation_loss": 0.31995484232902527, + "epoch": 0.45, + "learning_rate": 4.9500080702376805e-05, + "loss": 0.3075, + "step": 473, + "task_loss": 0.19546376168727875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7159166152340952, + "compression_loss": 0.0, + "distillation_loss": 0.5218741297721863, + "epoch": 0.45, + "learning_rate": 4.949795827942956e-05, + "loss": 0.4946, + "step": 474, + "task_loss": 0.24919168651103973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7159483368326413, + "compression_loss": 0.0, + "distillation_loss": 0.49504172801971436, + "epoch": 0.45, + "learning_rate": 4.9495831406310205e-05, + "loss": 0.4802, + "step": 475, + "task_loss": 0.34659019112586975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7159800504519146, + "compression_loss": 0.0, + "distillation_loss": 0.43946248292922974, + "epoch": 0.45, + "learning_rate": 4.94937000834051e-05, + "loss": 0.4265, + "step": 476, + "task_loss": 0.31031233072280884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.716011756092919, + "compression_loss": 0.0, + "distillation_loss": 0.3730089068412781, + "epoch": 0.45, + "learning_rate": 4.9491564311101426e-05, + "loss": 0.3571, + "step": 477, + "task_loss": 0.2140880525112152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7160434537566581, + "compression_loss": 0.0, + "distillation_loss": 0.22854942083358765, + "epoch": 0.45, + "learning_rate": 4.9489424089787125e-05, + "loss": 0.214, + "step": 478, + "task_loss": 0.08312854915857315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7160751434441356, + "compression_loss": 0.0, + "distillation_loss": 0.6738406419754028, + "epoch": 0.45, + "learning_rate": 4.948727941985101e-05, + "loss": 0.6537, + "step": 479, + "task_loss": 0.4723435044288635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7161068251563553, + "compression_loss": 0.0, + "distillation_loss": 0.44496259093284607, + "epoch": 0.46, + "learning_rate": 4.948513030168265e-05, + "loss": 0.4353, + "step": 480, + "task_loss": 0.34817105531692505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7161384988943207, + "compression_loss": 0.0, + "distillation_loss": 0.5351232290267944, + "epoch": 0.46, + "learning_rate": 4.948297673567245e-05, + "loss": 0.5138, + "step": 481, + "task_loss": 0.3218046724796295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7161701646590355, + "compression_loss": 0.0, + "distillation_loss": 0.5943255424499512, + "epoch": 0.46, + "learning_rate": 4.948081872221161e-05, + "loss": 0.5659, + "step": 482, + "task_loss": 0.31032171845436096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7162018224515034, + "compression_loss": 0.0, + "distillation_loss": 0.482815146446228, + "epoch": 0.46, + "learning_rate": 4.9478656261692155e-05, + "loss": 0.4567, + "step": 483, + "task_loss": 0.22122564911842346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7162334722727283, + "compression_loss": 0.0, + "distillation_loss": 0.695792019367218, + "epoch": 0.46, + "learning_rate": 4.947648935450689e-05, + "loss": 0.6668, + "step": 484, + "task_loss": 0.40558087825775146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7162651141237135, + "compression_loss": 0.0, + "distillation_loss": 0.2511596977710724, + "epoch": 0.46, + "learning_rate": 4.947431800104947e-05, + "loss": 0.2414, + "step": 485, + "task_loss": 0.15338845551013947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7162967480054631, + "compression_loss": 0.0, + "distillation_loss": 0.45596519112586975, + "epoch": 0.46, + "learning_rate": 4.94721422017143e-05, + "loss": 0.4342, + "step": 486, + "task_loss": 0.2383052110671997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7163283739189804, + "compression_loss": 0.0, + "distillation_loss": 0.3888728618621826, + "epoch": 0.46, + "learning_rate": 4.946996195689665e-05, + "loss": 0.3687, + "step": 487, + "task_loss": 0.18734854459762573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7163599918652693, + "compression_loss": 0.0, + "distillation_loss": 0.47673919796943665, + "epoch": 0.46, + "learning_rate": 4.9467777266992555e-05, + "loss": 0.4547, + "step": 488, + "task_loss": 0.2561192512512207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7163916018453333, + "compression_loss": 0.0, + "distillation_loss": 0.3363496959209442, + "epoch": 0.46, + "learning_rate": 4.946558813239888e-05, + "loss": 0.3277, + "step": 489, + "task_loss": 0.24951259791851044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7164232038601764, + "compression_loss": 0.0, + "distillation_loss": 0.3862308859825134, + "epoch": 0.47, + "learning_rate": 4.94633945535133e-05, + "loss": 0.3749, + "step": 490, + "task_loss": 0.2733776569366455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.716454797910802, + "compression_loss": 0.0, + "distillation_loss": 0.2634069323539734, + "epoch": 0.47, + "learning_rate": 4.946119653073428e-05, + "loss": 0.2535, + "step": 491, + "task_loss": 0.16414958238601685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.716486383998214, + "compression_loss": 0.0, + "distillation_loss": 0.37143170833587646, + "epoch": 0.47, + "learning_rate": 4.9458994064461103e-05, + "loss": 0.3589, + "step": 492, + "task_loss": 0.24603454768657684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7165179621234158, + "compression_loss": 0.0, + "distillation_loss": 0.24793553352355957, + "epoch": 0.47, + "learning_rate": 4.945678715509386e-05, + "loss": 0.2349, + "step": 493, + "task_loss": 0.11790718138217926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7165495322874113, + "compression_loss": 0.0, + "distillation_loss": 0.21886222064495087, + "epoch": 0.47, + "learning_rate": 4.9454575803033445e-05, + "loss": 0.2139, + "step": 494, + "task_loss": 0.1692572385072708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7165810944912041, + "compression_loss": 0.0, + "distillation_loss": 0.3721332550048828, + "epoch": 0.47, + "learning_rate": 4.945236000868156e-05, + "loss": 0.3528, + "step": 495, + "task_loss": 0.17849226295948029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7166126487357979, + "compression_loss": 0.0, + "distillation_loss": 0.4568150043487549, + "epoch": 0.47, + "learning_rate": 4.9450139772440715e-05, + "loss": 0.4353, + "step": 496, + "task_loss": 0.24134021997451782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7166441950221963, + "compression_loss": 0.0, + "distillation_loss": 0.20982536673545837, + "epoch": 0.47, + "learning_rate": 4.944791509471423e-05, + "loss": 0.204, + "step": 497, + "task_loss": 0.1519790142774582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7166757333514032, + "compression_loss": 0.0, + "distillation_loss": 0.30614525079727173, + "epoch": 0.47, + "learning_rate": 4.944568597590622e-05, + "loss": 0.288, + "step": 498, + "task_loss": 0.12448100000619888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7167072637244222, + "compression_loss": 0.0, + "distillation_loss": 0.4257122874259949, + "epoch": 0.47, + "learning_rate": 4.944345241642162e-05, + "loss": 0.415, + "step": 499, + "task_loss": 0.3188416361808777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7167387861422567, + "compression_loss": 0.0, + "distillation_loss": 0.23117899894714355, + "epoch": 0.47, + "learning_rate": 4.944121441666617e-05, + "loss": 0.2231, + "step": 500, + "task_loss": 0.15056748688220978 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.8956422018348624, + "eval_loss": 0.38681745529174805, + "eval_runtime": 15.2332, + "eval_samples_per_second": 57.243, + "eval_steps_per_second": 7.155, + "step": 500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7167703006059108, + "compression_loss": 0.0, + "distillation_loss": 0.20744021236896515, + "epoch": 0.48, + "learning_rate": 4.943897197704642e-05, + "loss": 0.1999, + "step": 501, + "task_loss": 0.13241733610630035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.716801807116388, + "compression_loss": 0.0, + "distillation_loss": 0.7784759402275085, + "epoch": 0.48, + "learning_rate": 4.9436725097969696e-05, + "loss": 0.7514, + "step": 502, + "task_loss": 0.5077430605888367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7168333056746918, + "compression_loss": 0.0, + "distillation_loss": 0.5257222652435303, + "epoch": 0.48, + "learning_rate": 4.943447377984418e-05, + "loss": 0.5057, + "step": 503, + "task_loss": 0.32530543208122253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7168647962818263, + "compression_loss": 0.0, + "distillation_loss": 0.13662202656269073, + "epoch": 0.48, + "learning_rate": 4.943221802307882e-05, + "loss": 0.1346, + "step": 504, + "task_loss": 0.11593800038099289 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7168962789387948, + "compression_loss": 0.0, + "distillation_loss": 0.3498018980026245, + "epoch": 0.48, + "learning_rate": 4.942995782808339e-05, + "loss": 0.3305, + "step": 505, + "task_loss": 0.15636515617370605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7169277536466011, + "compression_loss": 0.0, + "distillation_loss": 0.3136255145072937, + "epoch": 0.48, + "learning_rate": 4.9427693195268466e-05, + "loss": 0.3083, + "step": 506, + "task_loss": 0.26005005836486816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7169592204062489, + "compression_loss": 0.0, + "distillation_loss": 0.27342021465301514, + "epoch": 0.48, + "learning_rate": 4.942542412504543e-05, + "loss": 0.2671, + "step": 507, + "task_loss": 0.20997658371925354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.716990679218742, + "compression_loss": 0.0, + "distillation_loss": 0.19145192205905914, + "epoch": 0.48, + "learning_rate": 4.942315061782646e-05, + "loss": 0.1941, + "step": 508, + "task_loss": 0.21748016774654388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.717022130085084, + "compression_loss": 0.0, + "distillation_loss": 0.21470308303833008, + "epoch": 0.48, + "learning_rate": 4.942087267402457e-05, + "loss": 0.2198, + "step": 509, + "task_loss": 0.26588204503059387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7170535730062784, + "compression_loss": 0.0, + "distillation_loss": 0.3804921507835388, + "epoch": 0.48, + "learning_rate": 4.941859029405353e-05, + "loss": 0.3673, + "step": 510, + "task_loss": 0.24898777902126312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7170850079833291, + "compression_loss": 0.0, + "distillation_loss": 0.3862866759300232, + "epoch": 0.49, + "learning_rate": 4.9416303478327974e-05, + "loss": 0.3702, + "step": 511, + "task_loss": 0.22553594410419464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7171164350172397, + "compression_loss": 0.0, + "distillation_loss": 0.10008936375379562, + "epoch": 0.49, + "learning_rate": 4.9414012227263295e-05, + "loss": 0.0956, + "step": 512, + "task_loss": 0.05475004017353058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7171478541090139, + "compression_loss": 0.0, + "distillation_loss": 0.30363941192626953, + "epoch": 0.49, + "learning_rate": 4.941171654127572e-05, + "loss": 0.2881, + "step": 513, + "task_loss": 0.14781013131141663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7171792652596554, + "compression_loss": 0.0, + "distillation_loss": 0.5057163238525391, + "epoch": 0.49, + "learning_rate": 4.9409416420782264e-05, + "loss": 0.4856, + "step": 514, + "task_loss": 0.3049323260784149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7172106684701678, + "compression_loss": 0.0, + "distillation_loss": 0.1923336535692215, + "epoch": 0.49, + "learning_rate": 4.940711186620076e-05, + "loss": 0.1797, + "step": 515, + "task_loss": 0.06598946452140808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7172420637415549, + "compression_loss": 0.0, + "distillation_loss": 0.5293185710906982, + "epoch": 0.49, + "learning_rate": 4.9404802877949843e-05, + "loss": 0.5265, + "step": 516, + "task_loss": 0.5010278820991516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7172734510748203, + "compression_loss": 0.0, + "distillation_loss": 0.24947986006736755, + "epoch": 0.49, + "learning_rate": 4.940248945644894e-05, + "loss": 0.2324, + "step": 517, + "task_loss": 0.07890691608190536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7173048304709678, + "compression_loss": 0.0, + "distillation_loss": 0.27918702363967896, + "epoch": 0.49, + "learning_rate": 4.9400171602118306e-05, + "loss": 0.2666, + "step": 518, + "task_loss": 0.15293952822685242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7173362019310009, + "compression_loss": 0.0, + "distillation_loss": 0.4333661198616028, + "epoch": 0.49, + "learning_rate": 4.939784931537899e-05, + "loss": 0.4191, + "step": 519, + "task_loss": 0.29058921337127686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7173675654559234, + "compression_loss": 0.0, + "distillation_loss": 0.27411600947380066, + "epoch": 0.49, + "learning_rate": 4.9395522596652846e-05, + "loss": 0.2661, + "step": 520, + "task_loss": 0.1940668672323227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.717398921046739, + "compression_loss": 0.0, + "distillation_loss": 0.16018861532211304, + "epoch": 0.49, + "learning_rate": 4.939319144636253e-05, + "loss": 0.1733, + "step": 521, + "task_loss": 0.2914007306098938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7174302687044513, + "compression_loss": 0.0, + "distillation_loss": 0.2684335708618164, + "epoch": 0.5, + "learning_rate": 4.9390855864931504e-05, + "loss": 0.2554, + "step": 522, + "task_loss": 0.13780063390731812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7174616084300639, + "compression_loss": 0.0, + "distillation_loss": 0.20900654792785645, + "epoch": 0.5, + "learning_rate": 4.938851585278405e-05, + "loss": 0.1965, + "step": 523, + "task_loss": 0.08378442376852036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7174929402245807, + "compression_loss": 0.0, + "distillation_loss": 0.2784028649330139, + "epoch": 0.5, + "learning_rate": 4.938617141034523e-05, + "loss": 0.2624, + "step": 524, + "task_loss": 0.11791129410266876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7175242640890053, + "compression_loss": 0.0, + "distillation_loss": 0.12981046736240387, + "epoch": 0.5, + "learning_rate": 4.938382253804094e-05, + "loss": 0.1356, + "step": 525, + "task_loss": 0.18778133392333984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7175555800243414, + "compression_loss": 0.0, + "distillation_loss": 0.4506103992462158, + "epoch": 0.5, + "learning_rate": 4.938146923629784e-05, + "loss": 0.4333, + "step": 526, + "task_loss": 0.2775367796421051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7175868880315925, + "compression_loss": 0.0, + "distillation_loss": 0.3384746313095093, + "epoch": 0.5, + "learning_rate": 4.937911150554343e-05, + "loss": 0.3378, + "step": 527, + "task_loss": 0.3314347267150879 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7176181881117626, + "compression_loss": 0.0, + "distillation_loss": 0.2652072608470917, + "epoch": 0.5, + "learning_rate": 4.9376749346206006e-05, + "loss": 0.2549, + "step": 528, + "task_loss": 0.16171765327453613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7176494802658551, + "compression_loss": 0.0, + "distillation_loss": 0.1128808930516243, + "epoch": 0.5, + "learning_rate": 4.937438275871467e-05, + "loss": 0.1051, + "step": 529, + "task_loss": 0.034815624356269836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7176807644948738, + "compression_loss": 0.0, + "distillation_loss": 0.335213840007782, + "epoch": 0.5, + "learning_rate": 4.9372011743499315e-05, + "loss": 0.3248, + "step": 530, + "task_loss": 0.23120583593845367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7177120407998224, + "compression_loss": 0.0, + "distillation_loss": 0.3072986900806427, + "epoch": 0.5, + "learning_rate": 4.9369636300990645e-05, + "loss": 0.3177, + "step": 531, + "task_loss": 0.41148343682289124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7177433091817046, + "compression_loss": 0.0, + "distillation_loss": 0.3255481421947479, + "epoch": 0.51, + "learning_rate": 4.936725643162018e-05, + "loss": 0.3308, + "step": 532, + "task_loss": 0.37787866592407227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.717774569641524, + "compression_loss": 0.0, + "distillation_loss": 0.8519558906555176, + "epoch": 0.51, + "learning_rate": 4.936487213582023e-05, + "loss": 0.8174, + "step": 533, + "task_loss": 0.5065959095954895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7178058221802843, + "compression_loss": 0.0, + "distillation_loss": 0.20490780472755432, + "epoch": 0.51, + "learning_rate": 4.9362483414023905e-05, + "loss": 0.2188, + "step": 534, + "task_loss": 0.34347814321517944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7178370667989893, + "compression_loss": 0.0, + "distillation_loss": 0.2214491367340088, + "epoch": 0.51, + "learning_rate": 4.936009026666515e-05, + "loss": 0.2179, + "step": 535, + "task_loss": 0.18594780564308167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7178683034986425, + "compression_loss": 0.0, + "distillation_loss": 0.2852451801300049, + "epoch": 0.51, + "learning_rate": 4.935769269417867e-05, + "loss": 0.2706, + "step": 536, + "task_loss": 0.13881553709506989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7178995322802476, + "compression_loss": 0.0, + "distillation_loss": 0.16195335984230042, + "epoch": 0.51, + "learning_rate": 4.935529069700001e-05, + "loss": 0.1512, + "step": 537, + "task_loss": 0.05430576950311661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7179307531448085, + "compression_loss": 0.0, + "distillation_loss": 0.2246689647436142, + "epoch": 0.51, + "learning_rate": 4.935288427556549e-05, + "loss": 0.2136, + "step": 538, + "task_loss": 0.11377856135368347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7179619660933286, + "compression_loss": 0.0, + "distillation_loss": 0.17521658539772034, + "epoch": 0.51, + "learning_rate": 4.935047343031227e-05, + "loss": 0.1673, + "step": 539, + "task_loss": 0.0958407074213028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7179931711268118, + "compression_loss": 0.0, + "distillation_loss": 0.4904990792274475, + "epoch": 0.51, + "learning_rate": 4.934805816167827e-05, + "loss": 0.4757, + "step": 540, + "task_loss": 0.3428148627281189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7180243682462617, + "compression_loss": 0.0, + "distillation_loss": 0.362811416387558, + "epoch": 0.51, + "learning_rate": 4.934563847010224e-05, + "loss": 0.3665, + "step": 541, + "task_loss": 0.39953047037124634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7180555574526819, + "compression_loss": 0.0, + "distillation_loss": 0.2322240173816681, + "epoch": 0.51, + "learning_rate": 4.934321435602374e-05, + "loss": 0.2193, + "step": 542, + "task_loss": 0.10299921035766602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7180867387470763, + "compression_loss": 0.0, + "distillation_loss": 0.15493597090244293, + "epoch": 0.52, + "learning_rate": 4.934078581988311e-05, + "loss": 0.1526, + "step": 543, + "task_loss": 0.13205915689468384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7181179121304484, + "compression_loss": 0.0, + "distillation_loss": 0.4433606266975403, + "epoch": 0.52, + "learning_rate": 4.933835286212151e-05, + "loss": 0.4219, + "step": 544, + "task_loss": 0.22923287749290466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7181490776038018, + "compression_loss": 0.0, + "distillation_loss": 0.3247934579849243, + "epoch": 0.52, + "learning_rate": 4.9335915483180896e-05, + "loss": 0.3249, + "step": 545, + "task_loss": 0.3255178928375244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7181802351681404, + "compression_loss": 0.0, + "distillation_loss": 0.6595593690872192, + "epoch": 0.52, + "learning_rate": 4.9333473683504025e-05, + "loss": 0.6289, + "step": 546, + "task_loss": 0.35308799147605896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7182113848244678, + "compression_loss": 0.0, + "distillation_loss": 0.3609609603881836, + "epoch": 0.52, + "learning_rate": 4.9331027463534484e-05, + "loss": 0.3433, + "step": 547, + "task_loss": 0.18484395742416382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7182425265737876, + "compression_loss": 0.0, + "distillation_loss": 0.4003904163837433, + "epoch": 0.52, + "learning_rate": 4.932857682371661e-05, + "loss": 0.3825, + "step": 548, + "task_loss": 0.22114846110343933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7182736604171036, + "compression_loss": 0.0, + "distillation_loss": 0.3539576828479767, + "epoch": 0.52, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.3378, + "step": 549, + "task_loss": 0.19238103926181793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7183047863554195, + "compression_loss": 0.0, + "distillation_loss": 0.09516200423240662, + "epoch": 0.52, + "learning_rate": 4.932366228631741e-05, + "loss": 0.0975, + "step": 550, + "task_loss": 0.11850506067276001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7183359043897388, + "compression_loss": 0.0, + "distillation_loss": 0.784862220287323, + "epoch": 0.52, + "learning_rate": 4.932119838962882e-05, + "loss": 0.7474, + "step": 551, + "task_loss": 0.41059941053390503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7183670145210653, + "compression_loss": 0.0, + "distillation_loss": 0.3884808421134949, + "epoch": 0.52, + "learning_rate": 4.931873007487741e-05, + "loss": 0.3695, + "step": 552, + "task_loss": 0.19908683001995087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7183981167504028, + "compression_loss": 0.0, + "distillation_loss": 0.1411825716495514, + "epoch": 0.53, + "learning_rate": 4.9316257342511565e-05, + "loss": 0.1397, + "step": 553, + "task_loss": 0.12602877616882324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7184292110787548, + "compression_loss": 0.0, + "distillation_loss": 0.27567747235298157, + "epoch": 0.53, + "learning_rate": 4.9313780192980466e-05, + "loss": 0.259, + "step": 554, + "task_loss": 0.10907714068889618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.718460297507125, + "compression_loss": 0.0, + "distillation_loss": 0.23973673582077026, + "epoch": 0.53, + "learning_rate": 4.9311298626734095e-05, + "loss": 0.2289, + "step": 555, + "task_loss": 0.13170108199119568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7184913760365172, + "compression_loss": 0.0, + "distillation_loss": 0.3157758116722107, + "epoch": 0.53, + "learning_rate": 4.9308812644223245e-05, + "loss": 0.2988, + "step": 556, + "task_loss": 0.14566126465797424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7185224466679351, + "compression_loss": 0.0, + "distillation_loss": 0.19598382711410522, + "epoch": 0.53, + "learning_rate": 4.9306322245899505e-05, + "loss": 0.187, + "step": 557, + "task_loss": 0.10587802529335022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.718553509402382, + "compression_loss": 0.0, + "distillation_loss": 0.26901909708976746, + "epoch": 0.53, + "learning_rate": 4.930382743221528e-05, + "loss": 0.2577, + "step": 558, + "task_loss": 0.1556887924671173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7185845642408621, + "compression_loss": 0.0, + "distillation_loss": 0.4021989703178406, + "epoch": 0.53, + "learning_rate": 4.930132820362374e-05, + "loss": 0.3864, + "step": 559, + "task_loss": 0.24379751086235046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7186156111843789, + "compression_loss": 0.0, + "distillation_loss": 0.4040166735649109, + "epoch": 0.53, + "learning_rate": 4.9298824560578895e-05, + "loss": 0.387, + "step": 560, + "task_loss": 0.2337295562028885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7186466502339359, + "compression_loss": 0.0, + "distillation_loss": 0.5355952978134155, + "epoch": 0.53, + "learning_rate": 4.929631650353555e-05, + "loss": 0.5101, + "step": 561, + "task_loss": 0.28034472465515137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.718677681390537, + "compression_loss": 0.0, + "distillation_loss": 0.22372055053710938, + "epoch": 0.53, + "learning_rate": 4.92938040329493e-05, + "loss": 0.2111, + "step": 562, + "task_loss": 0.09713706374168396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7187087046551858, + "compression_loss": 0.0, + "distillation_loss": 0.5858652591705322, + "epoch": 0.53, + "learning_rate": 4.9291287149276544e-05, + "loss": 0.5583, + "step": 563, + "task_loss": 0.31010231375694275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7187397200288859, + "compression_loss": 0.0, + "distillation_loss": 0.20850016176700592, + "epoch": 0.54, + "learning_rate": 4.928876585297448e-05, + "loss": 0.2016, + "step": 564, + "task_loss": 0.1396273374557495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7187707275126411, + "compression_loss": 0.0, + "distillation_loss": 0.2773027718067169, + "epoch": 0.54, + "learning_rate": 4.9286240144501136e-05, + "loss": 0.262, + "step": 565, + "task_loss": 0.1242566779255867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.718801727107455, + "compression_loss": 0.0, + "distillation_loss": 0.11861973255872726, + "epoch": 0.54, + "learning_rate": 4.928371002431531e-05, + "loss": 0.1102, + "step": 566, + "task_loss": 0.03490396589040756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7188327188143315, + "compression_loss": 0.0, + "distillation_loss": 0.04367408901453018, + "epoch": 0.54, + "learning_rate": 4.92811754928766e-05, + "loss": 0.0404, + "step": 567, + "task_loss": 0.010761696845293045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.718863702634274, + "compression_loss": 0.0, + "distillation_loss": 0.5102982521057129, + "epoch": 0.54, + "learning_rate": 4.927863655064542e-05, + "loss": 0.4859, + "step": 568, + "task_loss": 0.2663833200931549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7188946785682863, + "compression_loss": 0.0, + "distillation_loss": 0.5678519010543823, + "epoch": 0.54, + "learning_rate": 4.9276093198082986e-05, + "loss": 0.5548, + "step": 569, + "task_loss": 0.4374026358127594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7189256466173721, + "compression_loss": 0.0, + "distillation_loss": 0.2537938952445984, + "epoch": 0.54, + "learning_rate": 4.92735454356513e-05, + "loss": 0.2444, + "step": 570, + "task_loss": 0.15972843766212463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.718956606782535, + "compression_loss": 0.0, + "distillation_loss": 0.37457334995269775, + "epoch": 0.54, + "learning_rate": 4.927099326381319e-05, + "loss": 0.3575, + "step": 571, + "task_loss": 0.20346179604530334 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7189875590647787, + "compression_loss": 0.0, + "distillation_loss": 0.4022772014141083, + "epoch": 0.54, + "learning_rate": 4.926843668303227e-05, + "loss": 0.3899, + "step": 572, + "task_loss": 0.27801287174224854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7190185034651071, + "compression_loss": 0.0, + "distillation_loss": 0.21378515660762787, + "epoch": 0.54, + "learning_rate": 4.926587569377293e-05, + "loss": 0.202, + "step": 573, + "task_loss": 0.09593548625707626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7190494399845236, + "compression_loss": 0.0, + "distillation_loss": 0.2173890620470047, + "epoch": 0.55, + "learning_rate": 4.926331029650042e-05, + "loss": 0.2096, + "step": 574, + "task_loss": 0.1397651731967926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.719080368624032, + "compression_loss": 0.0, + "distillation_loss": 0.0907929316163063, + "epoch": 0.55, + "learning_rate": 4.926074049168074e-05, + "loss": 0.0843, + "step": 575, + "task_loss": 0.026197172701358795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.719111289384636, + "compression_loss": 0.0, + "distillation_loss": 0.281998872756958, + "epoch": 0.55, + "learning_rate": 4.9258166279780704e-05, + "loss": 0.2744, + "step": 576, + "task_loss": 0.20600730180740356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7191422022673393, + "compression_loss": 0.0, + "distillation_loss": 0.30183589458465576, + "epoch": 0.55, + "learning_rate": 4.925558766126794e-05, + "loss": 0.2886, + "step": 577, + "task_loss": 0.16912756860256195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7191731072731453, + "compression_loss": 0.0, + "distillation_loss": 0.49650150537490845, + "epoch": 0.55, + "learning_rate": 4.9253004636610856e-05, + "loss": 0.4803, + "step": 578, + "task_loss": 0.33414289355278015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7192040044030581, + "compression_loss": 0.0, + "distillation_loss": 0.39063113927841187, + "epoch": 0.55, + "learning_rate": 4.925041720627868e-05, + "loss": 0.3818, + "step": 579, + "task_loss": 0.301964670419693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7192348936580812, + "compression_loss": 0.0, + "distillation_loss": 0.34917062520980835, + "epoch": 0.55, + "learning_rate": 4.9247825370741416e-05, + "loss": 0.336, + "step": 580, + "task_loss": 0.2170882225036621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7192657750392183, + "compression_loss": 0.0, + "distillation_loss": 0.17031094431877136, + "epoch": 0.55, + "learning_rate": 4.924522913046991e-05, + "loss": 0.1716, + "step": 581, + "task_loss": 0.18353326618671417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.719296648547473, + "compression_loss": 0.0, + "distillation_loss": 0.10322088748216629, + "epoch": 0.55, + "learning_rate": 4.924262848593576e-05, + "loss": 0.0994, + "step": 582, + "task_loss": 0.0652477964758873 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7193275141838491, + "compression_loss": 0.0, + "distillation_loss": 0.11138247698545456, + "epoch": 0.55, + "learning_rate": 4.924002343761139e-05, + "loss": 0.1033, + "step": 583, + "task_loss": 0.030367694795131683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7193583719493502, + "compression_loss": 0.0, + "distillation_loss": 0.8268933892250061, + "epoch": 0.55, + "learning_rate": 4.923741398597002e-05, + "loss": 0.7975, + "step": 584, + "task_loss": 0.5331718921661377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7193892218449799, + "compression_loss": 0.0, + "distillation_loss": 0.535077691078186, + "epoch": 0.56, + "learning_rate": 4.9234800131485675e-05, + "loss": 0.5079, + "step": 585, + "task_loss": 0.263676255941391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7194200638717421, + "compression_loss": 0.0, + "distillation_loss": 0.2574825882911682, + "epoch": 0.56, + "learning_rate": 4.9232181874633164e-05, + "loss": 0.2479, + "step": 586, + "task_loss": 0.16171754896640778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7194508980306403, + "compression_loss": 0.0, + "distillation_loss": 0.28252261877059937, + "epoch": 0.56, + "learning_rate": 4.922955921588812e-05, + "loss": 0.2685, + "step": 587, + "task_loss": 0.14232273399829865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7194817243226783, + "compression_loss": 0.0, + "distillation_loss": 0.5040257573127747, + "epoch": 0.56, + "learning_rate": 4.922693215572695e-05, + "loss": 0.4797, + "step": 588, + "task_loss": 0.2605317533016205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7195125427488598, + "compression_loss": 0.0, + "distillation_loss": 0.2037096619606018, + "epoch": 0.56, + "learning_rate": 4.922430069462688e-05, + "loss": 0.1908, + "step": 589, + "task_loss": 0.07464735209941864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7195433533101884, + "compression_loss": 0.0, + "distillation_loss": 0.21558362245559692, + "epoch": 0.56, + "learning_rate": 4.9221664833065914e-05, + "loss": 0.2088, + "step": 590, + "task_loss": 0.14768168330192566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7195741560076677, + "compression_loss": 0.0, + "distillation_loss": 0.40281903743743896, + "epoch": 0.56, + "learning_rate": 4.921902457152289e-05, + "loss": 0.3805, + "step": 591, + "task_loss": 0.18012678623199463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7196049508423016, + "compression_loss": 0.0, + "distillation_loss": 0.3910696804523468, + "epoch": 0.56, + "learning_rate": 4.9216379910477403e-05, + "loss": 0.3727, + "step": 592, + "task_loss": 0.20783747732639313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7196357378150936, + "compression_loss": 0.0, + "distillation_loss": 0.24493408203125, + "epoch": 0.56, + "learning_rate": 4.921373085040988e-05, + "loss": 0.2339, + "step": 593, + "task_loss": 0.13446438312530518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7196665169270474, + "compression_loss": 0.0, + "distillation_loss": 0.13932454586029053, + "epoch": 0.56, + "learning_rate": 4.921107739180153e-05, + "loss": 0.1317, + "step": 594, + "task_loss": 0.06355321407318115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7196972881791668, + "compression_loss": 0.0, + "distillation_loss": 0.395768940448761, + "epoch": 0.57, + "learning_rate": 4.9208419535134376e-05, + "loss": 0.384, + "step": 595, + "task_loss": 0.2776716947555542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7197280515724555, + "compression_loss": 0.0, + "distillation_loss": 0.24235188961029053, + "epoch": 0.57, + "learning_rate": 4.920575728089122e-05, + "loss": 0.2304, + "step": 596, + "task_loss": 0.1224876269698143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.719758807107917, + "compression_loss": 0.0, + "distillation_loss": 0.11212563514709473, + "epoch": 0.57, + "learning_rate": 4.920309062955568e-05, + "loss": 0.1317, + "step": 597, + "task_loss": 0.3079983592033386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.719789554786555, + "compression_loss": 0.0, + "distillation_loss": 0.3031010627746582, + "epoch": 0.57, + "learning_rate": 4.920041958161217e-05, + "loss": 0.2929, + "step": 598, + "task_loss": 0.20059747993946075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7198202946093734, + "compression_loss": 0.0, + "distillation_loss": 0.6109755039215088, + "epoch": 0.57, + "learning_rate": 4.9197744137545884e-05, + "loss": 0.5924, + "step": 599, + "task_loss": 0.4249090254306793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7198510265773757, + "compression_loss": 0.0, + "distillation_loss": 0.33642804622650146, + "epoch": 0.57, + "learning_rate": 4.919506429784284e-05, + "loss": 0.3205, + "step": 600, + "task_loss": 0.17708787322044373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7198817506915656, + "compression_loss": 0.0, + "distillation_loss": 0.191384956240654, + "epoch": 0.57, + "learning_rate": 4.919238006298984e-05, + "loss": 0.2127, + "step": 601, + "task_loss": 0.40497034788131714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7199124669529468, + "compression_loss": 0.0, + "distillation_loss": 0.19533386826515198, + "epoch": 0.57, + "learning_rate": 4.9189691433474494e-05, + "loss": 0.1888, + "step": 602, + "task_loss": 0.12987524271011353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.719943175362523, + "compression_loss": 0.0, + "distillation_loss": 0.3835797607898712, + "epoch": 0.57, + "learning_rate": 4.91869984097852e-05, + "loss": 0.3607, + "step": 603, + "task_loss": 0.15473908185958862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7199738759212977, + "compression_loss": 0.0, + "distillation_loss": 0.2762410342693329, + "epoch": 0.57, + "learning_rate": 4.918430099241116e-05, + "loss": 0.2624, + "step": 604, + "task_loss": 0.137907475233078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.720004568630275, + "compression_loss": 0.0, + "distillation_loss": 0.11789911985397339, + "epoch": 0.57, + "learning_rate": 4.918159918184236e-05, + "loss": 0.1095, + "step": 605, + "task_loss": 0.03381138667464256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7200352534904582, + "compression_loss": 0.0, + "distillation_loss": 0.26301029324531555, + "epoch": 0.58, + "learning_rate": 4.9178892978569625e-05, + "loss": 0.2488, + "step": 606, + "task_loss": 0.12070365250110626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7200659305028512, + "compression_loss": 0.0, + "distillation_loss": 0.1380884349346161, + "epoch": 0.58, + "learning_rate": 4.9176182383084524e-05, + "loss": 0.1346, + "step": 607, + "task_loss": 0.10351494699716568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7200965996684575, + "compression_loss": 0.0, + "distillation_loss": 0.4840831756591797, + "epoch": 0.58, + "learning_rate": 4.917346739587946e-05, + "loss": 0.4609, + "step": 608, + "task_loss": 0.25223809480667114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.720127260988281, + "compression_loss": 0.0, + "distillation_loss": 0.28585392236709595, + "epoch": 0.58, + "learning_rate": 4.917074801744763e-05, + "loss": 0.2685, + "step": 609, + "task_loss": 0.112055703997612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7201579144633251, + "compression_loss": 0.0, + "distillation_loss": 0.379632830619812, + "epoch": 0.58, + "learning_rate": 4.916802424828301e-05, + "loss": 0.3577, + "step": 610, + "task_loss": 0.15994824469089508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7201885600945938, + "compression_loss": 0.0, + "distillation_loss": 0.3786671757698059, + "epoch": 0.58, + "learning_rate": 4.9165296088880384e-05, + "loss": 0.3794, + "step": 611, + "task_loss": 0.38576364517211914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7202191978830905, + "compression_loss": 0.0, + "distillation_loss": 0.2917826771736145, + "epoch": 0.58, + "learning_rate": 4.916256353973535e-05, + "loss": 0.2759, + "step": 612, + "task_loss": 0.13341909646987915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7202498278298191, + "compression_loss": 0.0, + "distillation_loss": 0.24248436093330383, + "epoch": 0.58, + "learning_rate": 4.9159826601344286e-05, + "loss": 0.2274, + "step": 613, + "task_loss": 0.0919809564948082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7202804499357831, + "compression_loss": 0.0, + "distillation_loss": 0.12847685813903809, + "epoch": 0.58, + "learning_rate": 4.915708527420435e-05, + "loss": 0.1231, + "step": 614, + "task_loss": 0.0744004100561142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7203110642019863, + "compression_loss": 0.0, + "distillation_loss": 0.1666882485151291, + "epoch": 0.58, + "learning_rate": 4.9154339558813546e-05, + "loss": 0.1631, + "step": 615, + "task_loss": 0.13053244352340698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7203416706294323, + "compression_loss": 0.0, + "distillation_loss": 0.7391297817230225, + "epoch": 0.58, + "learning_rate": 4.915158945567062e-05, + "loss": 0.7177, + "step": 616, + "task_loss": 0.5249520540237427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.720372269219125, + "compression_loss": 0.0, + "distillation_loss": 0.18314534425735474, + "epoch": 0.59, + "learning_rate": 4.914883496527516e-05, + "loss": 0.1906, + "step": 617, + "task_loss": 0.2574879825115204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7204028599720679, + "compression_loss": 0.0, + "distillation_loss": 0.38090646266937256, + "epoch": 0.59, + "learning_rate": 4.914607608812753e-05, + "loss": 0.3593, + "step": 618, + "task_loss": 0.1652752161026001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7204334428892646, + "compression_loss": 0.0, + "distillation_loss": 0.1799452304840088, + "epoch": 0.59, + "learning_rate": 4.9143312824728896e-05, + "loss": 0.1885, + "step": 619, + "task_loss": 0.26563724875450134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7204640179717189, + "compression_loss": 0.0, + "distillation_loss": 0.2368253916501999, + "epoch": 0.59, + "learning_rate": 4.91405451755812e-05, + "loss": 0.2235, + "step": 620, + "task_loss": 0.10326235741376877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7204945852204344, + "compression_loss": 0.0, + "distillation_loss": 0.5095282196998596, + "epoch": 0.59, + "learning_rate": 4.913777314118721e-05, + "loss": 0.4846, + "step": 621, + "task_loss": 0.2607031464576721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7205251446364149, + "compression_loss": 0.0, + "distillation_loss": 0.12455812841653824, + "epoch": 0.59, + "learning_rate": 4.9134996722050483e-05, + "loss": 0.1324, + "step": 622, + "task_loss": 0.20319408178329468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7205556962206641, + "compression_loss": 0.0, + "distillation_loss": 0.3534661531448364, + "epoch": 0.59, + "learning_rate": 4.913221591867537e-05, + "loss": 0.3352, + "step": 623, + "task_loss": 0.17124255001544952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7205862399741856, + "compression_loss": 0.0, + "distillation_loss": 0.06430540978908539, + "epoch": 0.59, + "learning_rate": 4.912943073156701e-05, + "loss": 0.0592, + "step": 624, + "task_loss": 0.013450298458337784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.720616775897983, + "compression_loss": 0.0, + "distillation_loss": 0.26247042417526245, + "epoch": 0.59, + "learning_rate": 4.912664116123134e-05, + "loss": 0.2455, + "step": 625, + "task_loss": 0.09270986914634705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7206473039930601, + "compression_loss": 0.0, + "distillation_loss": 0.5087012052536011, + "epoch": 0.59, + "learning_rate": 4.9123847208175126e-05, + "loss": 0.4844, + "step": 626, + "task_loss": 0.2655639946460724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7206778242604206, + "compression_loss": 0.0, + "distillation_loss": 0.2001875340938568, + "epoch": 0.6, + "learning_rate": 4.912104887290587e-05, + "loss": 0.1939, + "step": 627, + "task_loss": 0.13689836859703064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7207083367010682, + "compression_loss": 0.0, + "distillation_loss": 0.19963237643241882, + "epoch": 0.6, + "learning_rate": 4.911824615593193e-05, + "loss": 0.1886, + "step": 628, + "task_loss": 0.08933916687965393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7207388413160064, + "compression_loss": 0.0, + "distillation_loss": 0.4403138756752014, + "epoch": 0.6, + "learning_rate": 4.9115439057762416e-05, + "loss": 0.4213, + "step": 629, + "task_loss": 0.2505990266799927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.720769338106239, + "compression_loss": 0.0, + "distillation_loss": 0.4388275146484375, + "epoch": 0.6, + "learning_rate": 4.911262757890726e-05, + "loss": 0.4296, + "step": 630, + "task_loss": 0.3462795615196228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7207998270727697, + "compression_loss": 0.0, + "distillation_loss": 0.33021849393844604, + "epoch": 0.6, + "learning_rate": 4.9109811719877166e-05, + "loss": 0.3132, + "step": 631, + "task_loss": 0.16027683019638062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7208303082166022, + "compression_loss": 0.0, + "distillation_loss": 0.483683317899704, + "epoch": 0.6, + "learning_rate": 4.910699148118367e-05, + "loss": 0.4708, + "step": 632, + "task_loss": 0.3550935983657837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7208607815387402, + "compression_loss": 0.0, + "distillation_loss": 0.39671921730041504, + "epoch": 0.6, + "learning_rate": 4.910416686333906e-05, + "loss": 0.3752, + "step": 633, + "task_loss": 0.18194958567619324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7208912470401873, + "compression_loss": 0.0, + "distillation_loss": 0.5867143273353577, + "epoch": 0.6, + "learning_rate": 4.910133786685646e-05, + "loss": 0.5733, + "step": 634, + "task_loss": 0.4523051083087921 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7209217047219472, + "compression_loss": 0.0, + "distillation_loss": 0.43349331617355347, + "epoch": 0.6, + "learning_rate": 4.9098504492249764e-05, + "loss": 0.4114, + "step": 635, + "task_loss": 0.2126692235469818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7209521545850235, + "compression_loss": 0.0, + "distillation_loss": 0.35138991475105286, + "epoch": 0.6, + "learning_rate": 4.9095666740033664e-05, + "loss": 0.3348, + "step": 636, + "task_loss": 0.18542218208312988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7209825966304201, + "compression_loss": 0.0, + "distillation_loss": 0.43535393476486206, + "epoch": 0.6, + "learning_rate": 4.9092824610723655e-05, + "loss": 0.4145, + "step": 637, + "task_loss": 0.22690628468990326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7210130308591405, + "compression_loss": 0.0, + "distillation_loss": 0.2631605565547943, + "epoch": 0.61, + "learning_rate": 4.908997810483602e-05, + "loss": 0.2534, + "step": 638, + "task_loss": 0.1657785326242447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7210434572721884, + "compression_loss": 0.0, + "distillation_loss": 0.4615224003791809, + "epoch": 0.61, + "learning_rate": 4.908712722288785e-05, + "loss": 0.4336, + "step": 639, + "task_loss": 0.1823965460062027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7210738758705676, + "compression_loss": 0.0, + "distillation_loss": 0.3323667645454407, + "epoch": 0.61, + "learning_rate": 4.9084271965397014e-05, + "loss": 0.3282, + "step": 640, + "task_loss": 0.290405809879303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7211042866552817, + "compression_loss": 0.0, + "distillation_loss": 0.27497389912605286, + "epoch": 0.61, + "learning_rate": 4.908141233288218e-05, + "loss": 0.2726, + "step": 641, + "task_loss": 0.2515375018119812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7211346896273344, + "compression_loss": 0.0, + "distillation_loss": 0.1021830216050148, + "epoch": 0.61, + "learning_rate": 4.907854832586282e-05, + "loss": 0.094, + "step": 642, + "task_loss": 0.020755015313625336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7211650847877293, + "compression_loss": 0.0, + "distillation_loss": 0.19680382311344147, + "epoch": 0.61, + "learning_rate": 4.907567994485919e-05, + "loss": 0.1992, + "step": 643, + "task_loss": 0.2208843231201172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7211954721374703, + "compression_loss": 0.0, + "distillation_loss": 0.5825210809707642, + "epoch": 0.61, + "learning_rate": 4.9072807190392354e-05, + "loss": 0.5626, + "step": 644, + "task_loss": 0.383215069770813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7212258516775608, + "compression_loss": 0.0, + "distillation_loss": 0.3526345491409302, + "epoch": 0.61, + "learning_rate": 4.906993006298416e-05, + "loss": 0.3389, + "step": 645, + "task_loss": 0.21486197412014008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7212562234090046, + "compression_loss": 0.0, + "distillation_loss": 0.4113069772720337, + "epoch": 0.61, + "learning_rate": 4.9067048563157235e-05, + "loss": 0.4089, + "step": 646, + "task_loss": 0.38709861040115356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7212865873328055, + "compression_loss": 0.0, + "distillation_loss": 0.27968546748161316, + "epoch": 0.61, + "learning_rate": 4.906416269143505e-05, + "loss": 0.2654, + "step": 647, + "task_loss": 0.13687899708747864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.721316943449967, + "compression_loss": 0.0, + "distillation_loss": 0.4654969274997711, + "epoch": 0.62, + "learning_rate": 4.90612724483418e-05, + "loss": 0.4424, + "step": 648, + "task_loss": 0.23491407930850983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7213472917614929, + "compression_loss": 0.0, + "distillation_loss": 0.35083064436912537, + "epoch": 0.62, + "learning_rate": 4.905837783440253e-05, + "loss": 0.3333, + "step": 649, + "task_loss": 0.17527225613594055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7213776322683869, + "compression_loss": 0.0, + "distillation_loss": 0.7668882608413696, + "epoch": 0.62, + "learning_rate": 4.905547885014307e-05, + "loss": 0.7316, + "step": 650, + "task_loss": 0.4138070046901703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7214079649716525, + "compression_loss": 0.0, + "distillation_loss": 0.25231415033340454, + "epoch": 0.62, + "learning_rate": 4.9052575496090016e-05, + "loss": 0.2521, + "step": 651, + "task_loss": 0.2498636394739151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7214382898722936, + "compression_loss": 0.0, + "distillation_loss": 0.39607787132263184, + "epoch": 0.62, + "learning_rate": 4.904966777277079e-05, + "loss": 0.3755, + "step": 652, + "task_loss": 0.19066929817199707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7214686069713138, + "compression_loss": 0.0, + "distillation_loss": 0.33866336941719055, + "epoch": 0.62, + "learning_rate": 4.9046755680713586e-05, + "loss": 0.3344, + "step": 653, + "task_loss": 0.2955394387245178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7214989162697167, + "compression_loss": 0.0, + "distillation_loss": 0.26593947410583496, + "epoch": 0.62, + "learning_rate": 4.90438392204474e-05, + "loss": 0.2494, + "step": 654, + "task_loss": 0.10030423104763031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7215292177685061, + "compression_loss": 0.0, + "distillation_loss": 0.4320967495441437, + "epoch": 0.62, + "learning_rate": 4.9040918392502026e-05, + "loss": 0.4192, + "step": 655, + "task_loss": 0.30289995670318604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7215595114686856, + "compression_loss": 0.0, + "distillation_loss": 0.15407636761665344, + "epoch": 0.62, + "learning_rate": 4.903799319740804e-05, + "loss": 0.145, + "step": 656, + "task_loss": 0.0632171630859375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.721589797371259, + "compression_loss": 0.0, + "distillation_loss": 0.11901634931564331, + "epoch": 0.62, + "learning_rate": 4.903506363569683e-05, + "loss": 0.1128, + "step": 657, + "task_loss": 0.05649835988879204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7216200754772298, + "compression_loss": 0.0, + "distillation_loss": 0.2876704931259155, + "epoch": 0.62, + "learning_rate": 4.9032129707900556e-05, + "loss": 0.283, + "step": 658, + "task_loss": 0.2414519190788269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7216503457876018, + "compression_loss": 0.0, + "distillation_loss": 0.1815006583929062, + "epoch": 0.63, + "learning_rate": 4.9029191414552165e-05, + "loss": 0.1867, + "step": 659, + "task_loss": 0.23326468467712402 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7216806083033788, + "compression_loss": 0.0, + "distillation_loss": 0.28646424412727356, + "epoch": 0.63, + "learning_rate": 4.9026248756185445e-05, + "loss": 0.2784, + "step": 660, + "task_loss": 0.20588558912277222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7217108630255642, + "compression_loss": 0.0, + "distillation_loss": 0.22158020734786987, + "epoch": 0.63, + "learning_rate": 4.902330173333492e-05, + "loss": 0.2171, + "step": 661, + "task_loss": 0.17726564407348633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7217411099551618, + "compression_loss": 0.0, + "distillation_loss": 0.41758403182029724, + "epoch": 0.63, + "learning_rate": 4.9020350346535936e-05, + "loss": 0.3971, + "step": 662, + "task_loss": 0.21297332644462585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7217713490931754, + "compression_loss": 0.0, + "distillation_loss": 0.16590705513954163, + "epoch": 0.63, + "learning_rate": 4.901739459632463e-05, + "loss": 0.1547, + "step": 663, + "task_loss": 0.05394207686185837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7218015804406086, + "compression_loss": 0.0, + "distillation_loss": 0.22256065905094147, + "epoch": 0.63, + "learning_rate": 4.901443448323792e-05, + "loss": 0.2239, + "step": 664, + "task_loss": 0.2363419383764267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.721831803998465, + "compression_loss": 0.0, + "distillation_loss": 0.17048153281211853, + "epoch": 0.63, + "learning_rate": 4.901147000781355e-05, + "loss": 0.1603, + "step": 665, + "task_loss": 0.068704754114151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7218620197677484, + "compression_loss": 0.0, + "distillation_loss": 0.5146390199661255, + "epoch": 0.63, + "learning_rate": 4.9008501170589996e-05, + "loss": 0.4834, + "step": 666, + "task_loss": 0.20194561779499054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7218922277494625, + "compression_loss": 0.0, + "distillation_loss": 0.27669647336006165, + "epoch": 0.63, + "learning_rate": 4.900552797210658e-05, + "loss": 0.2729, + "step": 667, + "task_loss": 0.23846843838691711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7219224279446109, + "compression_loss": 0.0, + "distillation_loss": 0.5004544258117676, + "epoch": 0.63, + "learning_rate": 4.90025504129034e-05, + "loss": 0.4772, + "step": 668, + "task_loss": 0.26774314045906067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7219526203541973, + "compression_loss": 0.0, + "distillation_loss": 0.15412703156471252, + "epoch": 0.64, + "learning_rate": 4.8999568493521345e-05, + "loss": 0.1447, + "step": 669, + "task_loss": 0.0594392754137516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7219828049792254, + "compression_loss": 0.0, + "distillation_loss": 0.34250378608703613, + "epoch": 0.64, + "learning_rate": 4.899658221450208e-05, + "loss": 0.3341, + "step": 670, + "task_loss": 0.2580875754356384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7220129818206988, + "compression_loss": 0.0, + "distillation_loss": 0.334232360124588, + "epoch": 0.64, + "learning_rate": 4.899359157638809e-05, + "loss": 0.3119, + "step": 671, + "task_loss": 0.1106051504611969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7220431508796213, + "compression_loss": 0.0, + "distillation_loss": 0.15714368224143982, + "epoch": 0.64, + "learning_rate": 4.899059657972264e-05, + "loss": 0.1483, + "step": 672, + "task_loss": 0.06857656687498093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7220733121569965, + "compression_loss": 0.0, + "distillation_loss": 0.47153952717781067, + "epoch": 0.64, + "learning_rate": 4.898759722504977e-05, + "loss": 0.4422, + "step": 673, + "task_loss": 0.17770282924175262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7221034656538281, + "compression_loss": 0.0, + "distillation_loss": 0.24304763972759247, + "epoch": 0.64, + "learning_rate": 4.8984593512914356e-05, + "loss": 0.2322, + "step": 674, + "task_loss": 0.1346360594034195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7221336113711199, + "compression_loss": 0.0, + "distillation_loss": 0.20216722786426544, + "epoch": 0.64, + "learning_rate": 4.898158544386201e-05, + "loss": 0.2003, + "step": 675, + "task_loss": 0.18363113701343536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7221637493098754, + "compression_loss": 0.0, + "distillation_loss": 0.3209993243217468, + "epoch": 0.64, + "learning_rate": 4.897857301843917e-05, + "loss": 0.3081, + "step": 676, + "task_loss": 0.19210395216941833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7221938794710984, + "compression_loss": 0.0, + "distillation_loss": 0.38004785776138306, + "epoch": 0.64, + "learning_rate": 4.897555623719306e-05, + "loss": 0.3761, + "step": 677, + "task_loss": 0.34037119150161743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7222240018557925, + "compression_loss": 0.0, + "distillation_loss": 0.27278655767440796, + "epoch": 0.64, + "learning_rate": 4.897253510067169e-05, + "loss": 0.2581, + "step": 678, + "task_loss": 0.12568269670009613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7222541164649614, + "compression_loss": 0.0, + "distillation_loss": 0.48017337918281555, + "epoch": 0.64, + "learning_rate": 4.896950960942387e-05, + "loss": 0.4528, + "step": 679, + "task_loss": 0.20639115571975708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7222842232996088, + "compression_loss": 0.0, + "distillation_loss": 0.12991678714752197, + "epoch": 0.65, + "learning_rate": 4.896647976399919e-05, + "loss": 0.1368, + "step": 680, + "task_loss": 0.19825144112110138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7223143223607386, + "compression_loss": 0.0, + "distillation_loss": 0.2807837724685669, + "epoch": 0.65, + "learning_rate": 4.896344556494804e-05, + "loss": 0.2644, + "step": 681, + "task_loss": 0.11680983006954193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.722344413649354, + "compression_loss": 0.0, + "distillation_loss": 0.36504942178726196, + "epoch": 0.65, + "learning_rate": 4.8960407012821584e-05, + "loss": 0.3483, + "step": 682, + "task_loss": 0.19777251780033112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7223744971664591, + "compression_loss": 0.0, + "distillation_loss": 0.24195873737335205, + "epoch": 0.65, + "learning_rate": 4.895736410817181e-05, + "loss": 0.2359, + "step": 683, + "task_loss": 0.1811816245317459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7224045729130574, + "compression_loss": 0.0, + "distillation_loss": 0.17951583862304688, + "epoch": 0.65, + "learning_rate": 4.8954316851551465e-05, + "loss": 0.1744, + "step": 684, + "task_loss": 0.12814725935459137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7224346408901526, + "compression_loss": 0.0, + "distillation_loss": 0.3897627592086792, + "epoch": 0.65, + "learning_rate": 4.895126524351409e-05, + "loss": 0.3718, + "step": 685, + "task_loss": 0.20980247855186462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7224647010987484, + "compression_loss": 0.0, + "distillation_loss": 0.15180069208145142, + "epoch": 0.65, + "learning_rate": 4.8948209284614046e-05, + "loss": 0.144, + "step": 686, + "task_loss": 0.073697030544281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7224947535398485, + "compression_loss": 0.0, + "distillation_loss": 0.3309364914894104, + "epoch": 0.65, + "learning_rate": 4.894514897540643e-05, + "loss": 0.3178, + "step": 687, + "task_loss": 0.1999032199382782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7225247982144566, + "compression_loss": 0.0, + "distillation_loss": 0.5048620700836182, + "epoch": 0.65, + "learning_rate": 4.89420843164472e-05, + "loss": 0.475, + "step": 688, + "task_loss": 0.20606492459774017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7225548351235763, + "compression_loss": 0.0, + "distillation_loss": 0.26741254329681396, + "epoch": 0.65, + "learning_rate": 4.893901530829304e-05, + "loss": 0.2534, + "step": 689, + "task_loss": 0.12686991691589355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7225848642682113, + "compression_loss": 0.0, + "distillation_loss": 0.454744428396225, + "epoch": 0.66, + "learning_rate": 4.8935941951501463e-05, + "loss": 0.4328, + "step": 690, + "task_loss": 0.2351890504360199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7226148856493654, + "compression_loss": 0.0, + "distillation_loss": 0.4148391783237457, + "epoch": 0.66, + "learning_rate": 4.893286424663075e-05, + "loss": 0.391, + "step": 691, + "task_loss": 0.17629502713680267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7226448992680421, + "compression_loss": 0.0, + "distillation_loss": 0.3470875322818756, + "epoch": 0.66, + "learning_rate": 4.892978219423998e-05, + "loss": 0.3301, + "step": 692, + "task_loss": 0.17764955759048462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7226749051252452, + "compression_loss": 0.0, + "distillation_loss": 0.31869742274284363, + "epoch": 0.66, + "learning_rate": 4.892669579488903e-05, + "loss": 0.3036, + "step": 693, + "task_loss": 0.16786476969718933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7227049032219783, + "compression_loss": 0.0, + "distillation_loss": 0.24134713411331177, + "epoch": 0.66, + "learning_rate": 4.892360504913856e-05, + "loss": 0.2292, + "step": 694, + "task_loss": 0.12003158777952194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7227348935592453, + "compression_loss": 0.0, + "distillation_loss": 0.19437089562416077, + "epoch": 0.66, + "learning_rate": 4.8920509957550016e-05, + "loss": 0.1999, + "step": 695, + "task_loss": 0.24951303005218506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7227648761380496, + "compression_loss": 0.0, + "distillation_loss": 0.2377103865146637, + "epoch": 0.66, + "learning_rate": 4.8917410520685635e-05, + "loss": 0.2238, + "step": 696, + "task_loss": 0.09895863384008408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7227948509593951, + "compression_loss": 0.0, + "distillation_loss": 0.09030158072710037, + "epoch": 0.66, + "learning_rate": 4.891430673910844e-05, + "loss": 0.0912, + "step": 697, + "task_loss": 0.09956920146942139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7228248180242853, + "compression_loss": 0.0, + "distillation_loss": 0.20243610441684723, + "epoch": 0.66, + "learning_rate": 4.891119861338226e-05, + "loss": 0.1953, + "step": 698, + "task_loss": 0.13071851432323456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7228547773337239, + "compression_loss": 0.0, + "distillation_loss": 0.1336769163608551, + "epoch": 0.66, + "learning_rate": 4.8908086144071694e-05, + "loss": 0.1241, + "step": 699, + "task_loss": 0.03775034099817276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7228847288887148, + "compression_loss": 0.0, + "distillation_loss": 0.16907915472984314, + "epoch": 0.66, + "learning_rate": 4.8904969331742136e-05, + "loss": 0.1675, + "step": 700, + "task_loss": 0.15347930788993835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7229146726902614, + "compression_loss": 0.0, + "distillation_loss": 0.15456578135490417, + "epoch": 0.67, + "learning_rate": 4.890184817695976e-05, + "loss": 0.1522, + "step": 701, + "task_loss": 0.13044598698616028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7229446087393676, + "compression_loss": 0.0, + "distillation_loss": 0.18475860357284546, + "epoch": 0.67, + "learning_rate": 4.8898722680291564e-05, + "loss": 0.1854, + "step": 702, + "task_loss": 0.19159327447414398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7229745370370371, + "compression_loss": 0.0, + "distillation_loss": 0.3150096535682678, + "epoch": 0.67, + "learning_rate": 4.8895592842305295e-05, + "loss": 0.3036, + "step": 703, + "task_loss": 0.20053307712078094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7230044575842732, + "compression_loss": 0.0, + "distillation_loss": 0.3283127546310425, + "epoch": 0.67, + "learning_rate": 4.88924586635695e-05, + "loss": 0.322, + "step": 704, + "task_loss": 0.2651616036891937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7230343703820801, + "compression_loss": 0.0, + "distillation_loss": 0.19642552733421326, + "epoch": 0.67, + "learning_rate": 4.888932014465352e-05, + "loss": 0.1945, + "step": 705, + "task_loss": 0.1771748960018158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7230642754314611, + "compression_loss": 0.0, + "distillation_loss": 0.23262354731559753, + "epoch": 0.67, + "learning_rate": 4.888617728612749e-05, + "loss": 0.2279, + "step": 706, + "task_loss": 0.18583974242210388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7230941727334201, + "compression_loss": 0.0, + "distillation_loss": 0.329001247882843, + "epoch": 0.67, + "learning_rate": 4.888303008856231e-05, + "loss": 0.3181, + "step": 707, + "task_loss": 0.21996405720710754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7231240622889608, + "compression_loss": 0.0, + "distillation_loss": 0.5461586713790894, + "epoch": 0.67, + "learning_rate": 4.88798785525297e-05, + "loss": 0.5225, + "step": 708, + "task_loss": 0.3095916509628296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7231539440990866, + "compression_loss": 0.0, + "distillation_loss": 0.29736411571502686, + "epoch": 0.67, + "learning_rate": 4.887672267860214e-05, + "loss": 0.2917, + "step": 709, + "task_loss": 0.2409740537405014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7231838181648014, + "compression_loss": 0.0, + "distillation_loss": 0.33324652910232544, + "epoch": 0.67, + "learning_rate": 4.887356246735292e-05, + "loss": 0.3262, + "step": 710, + "task_loss": 0.2625032365322113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7232136844871089, + "compression_loss": 0.0, + "distillation_loss": 0.3226258158683777, + "epoch": 0.68, + "learning_rate": 4.8870397919356094e-05, + "loss": 0.3109, + "step": 711, + "task_loss": 0.20536869764328003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7232435430670128, + "compression_loss": 0.0, + "distillation_loss": 0.3054695427417755, + "epoch": 0.68, + "learning_rate": 4.8867229035186526e-05, + "loss": 0.3009, + "step": 712, + "task_loss": 0.25971150398254395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7232733939055166, + "compression_loss": 0.0, + "distillation_loss": 0.2144642472267151, + "epoch": 0.68, + "learning_rate": 4.886405581541986e-05, + "loss": 0.2068, + "step": 713, + "task_loss": 0.13790678977966309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7233032370036241, + "compression_loss": 0.0, + "distillation_loss": 0.193600594997406, + "epoch": 0.68, + "learning_rate": 4.886087826063252e-05, + "loss": 0.1853, + "step": 714, + "task_loss": 0.11010687053203583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.723333072362339, + "compression_loss": 0.0, + "distillation_loss": 0.3077680170536041, + "epoch": 0.68, + "learning_rate": 4.8857696371401735e-05, + "loss": 0.2948, + "step": 715, + "task_loss": 0.1778283566236496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.723362899982665, + "compression_loss": 0.0, + "distillation_loss": 0.24543452262878418, + "epoch": 0.68, + "learning_rate": 4.88545101483055e-05, + "loss": 0.2343, + "step": 716, + "task_loss": 0.13451889157295227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7233927198656058, + "compression_loss": 0.0, + "distillation_loss": 0.4300387501716614, + "epoch": 0.68, + "learning_rate": 4.885131959192262e-05, + "loss": 0.412, + "step": 717, + "task_loss": 0.24990172684192657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7234225320121649, + "compression_loss": 0.0, + "distillation_loss": 0.368339866399765, + "epoch": 0.68, + "learning_rate": 4.884812470283265e-05, + "loss": 0.3664, + "step": 718, + "task_loss": 0.348560631275177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7234523364233462, + "compression_loss": 0.0, + "distillation_loss": 0.18903806805610657, + "epoch": 0.68, + "learning_rate": 4.884492548161599e-05, + "loss": 0.186, + "step": 719, + "task_loss": 0.1587422788143158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7234821331001532, + "compression_loss": 0.0, + "distillation_loss": 0.2814609110355377, + "epoch": 0.68, + "learning_rate": 4.8841721928853776e-05, + "loss": 0.2723, + "step": 720, + "task_loss": 0.19001665711402893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7235119220435898, + "compression_loss": 0.0, + "distillation_loss": 0.4020082950592041, + "epoch": 0.68, + "learning_rate": 4.8838514045127945e-05, + "loss": 0.385, + "step": 721, + "task_loss": 0.23192302882671356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7235417032546595, + "compression_loss": 0.0, + "distillation_loss": 0.3017335832118988, + "epoch": 0.69, + "learning_rate": 4.883530183102123e-05, + "loss": 0.29, + "step": 722, + "task_loss": 0.1840285062789917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.723571476734366, + "compression_loss": 0.0, + "distillation_loss": 0.27277618646621704, + "epoch": 0.69, + "learning_rate": 4.883208528711715e-05, + "loss": 0.2582, + "step": 723, + "task_loss": 0.1269429624080658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.723601242483713, + "compression_loss": 0.0, + "distillation_loss": 0.2977296710014343, + "epoch": 0.69, + "learning_rate": 4.8828864413999995e-05, + "loss": 0.2751, + "step": 724, + "task_loss": 0.07180985063314438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7236310005037043, + "compression_loss": 0.0, + "distillation_loss": 0.08697059750556946, + "epoch": 0.69, + "learning_rate": 4.8825639212254865e-05, + "loss": 0.0934, + "step": 725, + "task_loss": 0.15161065757274628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7236607507953434, + "compression_loss": 0.0, + "distillation_loss": 0.190194234251976, + "epoch": 0.69, + "learning_rate": 4.882240968246762e-05, + "loss": 0.1799, + "step": 726, + "task_loss": 0.087301105260849 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7236904933596341, + "compression_loss": 0.0, + "distillation_loss": 0.22055265307426453, + "epoch": 0.69, + "learning_rate": 4.8819175825224925e-05, + "loss": 0.2109, + "step": 727, + "task_loss": 0.12390641123056412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7237202281975801, + "compression_loss": 0.0, + "distillation_loss": 0.18130475282669067, + "epoch": 0.69, + "learning_rate": 4.881593764111424e-05, + "loss": 0.1789, + "step": 728, + "task_loss": 0.15703324973583221 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7237499553101849, + "compression_loss": 0.0, + "distillation_loss": 0.21481934189796448, + "epoch": 0.69, + "learning_rate": 4.8812695130723775e-05, + "loss": 0.2068, + "step": 729, + "task_loss": 0.13483870029449463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7237796746984524, + "compression_loss": 0.0, + "distillation_loss": 0.2288239449262619, + "epoch": 0.69, + "learning_rate": 4.880944829464256e-05, + "loss": 0.2152, + "step": 730, + "task_loss": 0.09266623109579086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7238093863633862, + "compression_loss": 0.0, + "distillation_loss": 0.43135643005371094, + "epoch": 0.69, + "learning_rate": 4.880619713346039e-05, + "loss": 0.4258, + "step": 731, + "task_loss": 0.37580642104148865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7238390903059899, + "compression_loss": 0.0, + "distillation_loss": 0.5182836055755615, + "epoch": 0.7, + "learning_rate": 4.8802941647767856e-05, + "loss": 0.4974, + "step": 732, + "task_loss": 0.3091394901275635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7238687865272674, + "compression_loss": 0.0, + "distillation_loss": 0.33886805176734924, + "epoch": 0.7, + "learning_rate": 4.879968183815634e-05, + "loss": 0.3219, + "step": 733, + "task_loss": 0.16887077689170837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7238984750282221, + "compression_loss": 0.0, + "distillation_loss": 0.16470275819301605, + "epoch": 0.7, + "learning_rate": 4.8796417705217994e-05, + "loss": 0.1544, + "step": 734, + "task_loss": 0.06122714653611183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7239281558098579, + "compression_loss": 0.0, + "distillation_loss": 0.3576793074607849, + "epoch": 0.7, + "learning_rate": 4.879314924954577e-05, + "loss": 0.3392, + "step": 735, + "task_loss": 0.1729682832956314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7239578288731784, + "compression_loss": 0.0, + "distillation_loss": 0.1777103692293167, + "epoch": 0.7, + "learning_rate": 4.87898764717334e-05, + "loss": 0.1658, + "step": 736, + "task_loss": 0.0585070438683033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7239874942191873, + "compression_loss": 0.0, + "distillation_loss": 0.21319881081581116, + "epoch": 0.7, + "learning_rate": 4.8786599372375384e-05, + "loss": 0.2003, + "step": 737, + "task_loss": 0.08430507779121399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7240171518488882, + "compression_loss": 0.0, + "distillation_loss": 0.4297768473625183, + "epoch": 0.7, + "learning_rate": 4.878331795206705e-05, + "loss": 0.4102, + "step": 738, + "task_loss": 0.23426848649978638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7240468017632848, + "compression_loss": 0.0, + "distillation_loss": 0.44237983226776123, + "epoch": 0.7, + "learning_rate": 4.878003221140446e-05, + "loss": 0.4247, + "step": 739, + "task_loss": 0.2660304307937622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.724076443963381, + "compression_loss": 0.0, + "distillation_loss": 0.47253918647766113, + "epoch": 0.7, + "learning_rate": 4.877674215098449e-05, + "loss": 0.4542, + "step": 740, + "task_loss": 0.28894340991973877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7241060784501803, + "compression_loss": 0.0, + "distillation_loss": 0.3578449487686157, + "epoch": 0.7, + "learning_rate": 4.87734477714048e-05, + "loss": 0.3449, + "step": 741, + "task_loss": 0.22871339321136475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7241357052246863, + "compression_loss": 0.0, + "distillation_loss": 0.41139233112335205, + "epoch": 0.7, + "learning_rate": 4.8770149073263833e-05, + "loss": 0.3942, + "step": 742, + "task_loss": 0.23949165642261505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7241653242879028, + "compression_loss": 0.0, + "distillation_loss": 0.24485422670841217, + "epoch": 0.71, + "learning_rate": 4.87668460571608e-05, + "loss": 0.2349, + "step": 743, + "task_loss": 0.1458071619272232 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7241949356408335, + "compression_loss": 0.0, + "distillation_loss": 0.2777606248855591, + "epoch": 0.71, + "learning_rate": 4.8763538723695726e-05, + "loss": 0.2744, + "step": 744, + "task_loss": 0.2439776211977005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7242245392844819, + "compression_loss": 0.0, + "distillation_loss": 0.4678876996040344, + "epoch": 0.71, + "learning_rate": 4.87602270734694e-05, + "loss": 0.444, + "step": 745, + "task_loss": 0.22880345582962036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7242541352198519, + "compression_loss": 0.0, + "distillation_loss": 0.5279219150543213, + "epoch": 0.71, + "learning_rate": 4.8756911107083387e-05, + "loss": 0.5044, + "step": 746, + "task_loss": 0.2927531599998474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7242837234479472, + "compression_loss": 0.0, + "distillation_loss": 0.35156458616256714, + "epoch": 0.71, + "learning_rate": 4.875359082514006e-05, + "loss": 0.3392, + "step": 747, + "task_loss": 0.2275211364030838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7243133039697713, + "compression_loss": 0.0, + "distillation_loss": 0.18168729543685913, + "epoch": 0.71, + "learning_rate": 4.8750266228242555e-05, + "loss": 0.1755, + "step": 748, + "task_loss": 0.11939448118209839 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7243428767863281, + "compression_loss": 0.0, + "distillation_loss": 0.3357431888580322, + "epoch": 0.71, + "learning_rate": 4.874693731699481e-05, + "loss": 0.3205, + "step": 749, + "task_loss": 0.18339495360851288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.724372441898621, + "compression_loss": 0.0, + "distillation_loss": 0.2332255244255066, + "epoch": 0.71, + "learning_rate": 4.8743604092001544e-05, + "loss": 0.2189, + "step": 750, + "task_loss": 0.08988796174526215 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.8990825688073395, + "eval_loss": 0.3525860905647278, + "eval_runtime": 18.2739, + "eval_samples_per_second": 47.718, + "eval_steps_per_second": 5.965, + "step": 750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.724401999307654, + "compression_loss": 0.0, + "distillation_loss": 0.23790621757507324, + "epoch": 0.71, + "learning_rate": 4.8740266553868236e-05, + "loss": 0.2297, + "step": 751, + "task_loss": 0.15616914629936218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7244315490144305, + "compression_loss": 0.0, + "distillation_loss": 0.1910121589899063, + "epoch": 0.71, + "learning_rate": 4.873692470320117e-05, + "loss": 0.1767, + "step": 752, + "task_loss": 0.04741794615983963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7244610910199543, + "compression_loss": 0.0, + "distillation_loss": 0.6017439961433411, + "epoch": 0.72, + "learning_rate": 4.8733578540607425e-05, + "loss": 0.587, + "step": 753, + "task_loss": 0.45440027117729187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7244906253252291, + "compression_loss": 0.0, + "distillation_loss": 0.1898229718208313, + "epoch": 0.72, + "learning_rate": 4.8730228066694825e-05, + "loss": 0.1778, + "step": 754, + "task_loss": 0.06922735273838043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7245201519312585, + "compression_loss": 0.0, + "distillation_loss": 0.1818356066942215, + "epoch": 0.72, + "learning_rate": 4.872687328207202e-05, + "loss": 0.1712, + "step": 755, + "task_loss": 0.07538501918315887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7245496708390464, + "compression_loss": 0.0, + "distillation_loss": 0.3183574080467224, + "epoch": 0.72, + "learning_rate": 4.872351418734841e-05, + "loss": 0.3017, + "step": 756, + "task_loss": 0.15180720388889313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7245791820495963, + "compression_loss": 0.0, + "distillation_loss": 0.35951346158981323, + "epoch": 0.72, + "learning_rate": 4.8720150783134196e-05, + "loss": 0.3478, + "step": 757, + "task_loss": 0.2423841804265976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7246086855639118, + "compression_loss": 0.0, + "distillation_loss": 0.18601301312446594, + "epoch": 0.72, + "learning_rate": 4.871678307004035e-05, + "loss": 0.1736, + "step": 758, + "task_loss": 0.06224619597196579 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7246381813829967, + "compression_loss": 0.0, + "distillation_loss": 0.3574899733066559, + "epoch": 0.72, + "learning_rate": 4.8713411048678635e-05, + "loss": 0.3498, + "step": 759, + "task_loss": 0.280869722366333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7246676695078548, + "compression_loss": 0.0, + "distillation_loss": 0.4299178719520569, + "epoch": 0.72, + "learning_rate": 4.8710034719661614e-05, + "loss": 0.4142, + "step": 760, + "task_loss": 0.2722673714160919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7246971499394895, + "compression_loss": 0.0, + "distillation_loss": 0.2085331380367279, + "epoch": 0.72, + "learning_rate": 4.870665408360258e-05, + "loss": 0.1979, + "step": 761, + "task_loss": 0.10241978615522385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7247266226789048, + "compression_loss": 0.0, + "distillation_loss": 0.44271695613861084, + "epoch": 0.72, + "learning_rate": 4.870326914111567e-05, + "loss": 0.424, + "step": 762, + "task_loss": 0.2551548182964325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7247560877271041, + "compression_loss": 0.0, + "distillation_loss": 0.36846601963043213, + "epoch": 0.72, + "learning_rate": 4.8699879892815756e-05, + "loss": 0.3636, + "step": 763, + "task_loss": 0.31974709033966064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7247855450850912, + "compression_loss": 0.0, + "distillation_loss": 0.3941296935081482, + "epoch": 0.73, + "learning_rate": 4.8696486339318524e-05, + "loss": 0.3761, + "step": 764, + "task_loss": 0.21385695040225983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7248149947538698, + "compression_loss": 0.0, + "distillation_loss": 0.26239490509033203, + "epoch": 0.73, + "learning_rate": 4.8693088481240424e-05, + "loss": 0.2546, + "step": 765, + "task_loss": 0.18409638106822968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7248444367344437, + "compression_loss": 0.0, + "distillation_loss": 0.1657324880361557, + "epoch": 0.73, + "learning_rate": 4.86896863191987e-05, + "loss": 0.156, + "step": 766, + "task_loss": 0.06812982261180878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7248738710278163, + "compression_loss": 0.0, + "distillation_loss": 0.22085291147232056, + "epoch": 0.73, + "learning_rate": 4.8686279853811356e-05, + "loss": 0.2073, + "step": 767, + "task_loss": 0.08557181805372238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7249032976349914, + "compression_loss": 0.0, + "distillation_loss": 0.06682948768138885, + "epoch": 0.73, + "learning_rate": 4.8682869085697206e-05, + "loss": 0.0704, + "step": 768, + "task_loss": 0.10302991420030594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7249327165569729, + "compression_loss": 0.0, + "distillation_loss": 0.22944039106369019, + "epoch": 0.73, + "learning_rate": 4.8679454015475835e-05, + "loss": 0.2132, + "step": 769, + "task_loss": 0.06714411824941635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7249621277947641, + "compression_loss": 0.0, + "distillation_loss": 0.5438629388809204, + "epoch": 0.73, + "learning_rate": 4.867603464376759e-05, + "loss": 0.5187, + "step": 770, + "task_loss": 0.291761577129364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.724991531349369, + "compression_loss": 0.0, + "distillation_loss": 0.13500595092773438, + "epoch": 0.73, + "learning_rate": 4.867261097119363e-05, + "loss": 0.1268, + "step": 771, + "task_loss": 0.0527961365878582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7250209272217911, + "compression_loss": 0.0, + "distillation_loss": 0.2532868981361389, + "epoch": 0.73, + "learning_rate": 4.8669182998375884e-05, + "loss": 0.2471, + "step": 772, + "task_loss": 0.19150203466415405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7250503154130341, + "compression_loss": 0.0, + "distillation_loss": 0.1466882824897766, + "epoch": 0.73, + "learning_rate": 4.8665750725937045e-05, + "loss": 0.138, + "step": 773, + "task_loss": 0.059596575796604156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7250796959241018, + "compression_loss": 0.0, + "distillation_loss": 0.19294121861457825, + "epoch": 0.74, + "learning_rate": 4.866231415450062e-05, + "loss": 0.1905, + "step": 774, + "task_loss": 0.16841061413288116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7251090687559978, + "compression_loss": 0.0, + "distillation_loss": 0.21601201593875885, + "epoch": 0.74, + "learning_rate": 4.8658873284690866e-05, + "loss": 0.2109, + "step": 775, + "task_loss": 0.16467759013175964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7251384339097258, + "compression_loss": 0.0, + "distillation_loss": 0.2825254797935486, + "epoch": 0.74, + "learning_rate": 4.865542811713284e-05, + "loss": 0.283, + "step": 776, + "task_loss": 0.28761669993400574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7251677913862895, + "compression_loss": 0.0, + "distillation_loss": 0.17407667636871338, + "epoch": 0.74, + "learning_rate": 4.865197865245237e-05, + "loss": 0.1644, + "step": 777, + "task_loss": 0.07734289765357971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7251971411866924, + "compression_loss": 0.0, + "distillation_loss": 0.1487307846546173, + "epoch": 0.74, + "learning_rate": 4.8648524891276066e-05, + "loss": 0.1402, + "step": 778, + "task_loss": 0.06377163529396057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7252264833119385, + "compression_loss": 0.0, + "distillation_loss": 0.6334168910980225, + "epoch": 0.74, + "learning_rate": 4.8645066834231325e-05, + "loss": 0.616, + "step": 779, + "task_loss": 0.4591747522354126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7252558177630313, + "compression_loss": 0.0, + "distillation_loss": 0.3129650950431824, + "epoch": 0.74, + "learning_rate": 4.8641604481946314e-05, + "loss": 0.2944, + "step": 780, + "task_loss": 0.12727110087871552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7252851445409744, + "compression_loss": 0.0, + "distillation_loss": 0.5313491821289062, + "epoch": 0.74, + "learning_rate": 4.863813783504999e-05, + "loss": 0.5238, + "step": 781, + "task_loss": 0.4554893374443054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7253144636467717, + "compression_loss": 0.0, + "distillation_loss": 0.16231770813465118, + "epoch": 0.74, + "learning_rate": 4.863466689417209e-05, + "loss": 0.1523, + "step": 782, + "task_loss": 0.062042634934186935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7253437750814267, + "compression_loss": 0.0, + "distillation_loss": 0.20974399149417877, + "epoch": 0.74, + "learning_rate": 4.863119165994312e-05, + "loss": 0.2063, + "step": 783, + "task_loss": 0.1748623102903366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7253730788459432, + "compression_loss": 0.0, + "distillation_loss": 0.2711687684059143, + "epoch": 0.74, + "learning_rate": 4.862771213299438e-05, + "loss": 0.2706, + "step": 784, + "task_loss": 0.26576724648475647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7254023749413246, + "compression_loss": 0.0, + "distillation_loss": 0.2579856514930725, + "epoch": 0.75, + "learning_rate": 4.8624228313957937e-05, + "loss": 0.2482, + "step": 785, + "task_loss": 0.1598002314567566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.725431663368575, + "compression_loss": 0.0, + "distillation_loss": 0.2370847761631012, + "epoch": 0.75, + "learning_rate": 4.862074020346664e-05, + "loss": 0.2268, + "step": 786, + "task_loss": 0.1346910297870636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7254609441286979, + "compression_loss": 0.0, + "distillation_loss": 0.4882653057575226, + "epoch": 0.75, + "learning_rate": 4.8617247802154134e-05, + "loss": 0.4718, + "step": 787, + "task_loss": 0.323346883058548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7254902172226969, + "compression_loss": 0.0, + "distillation_loss": 0.5949300527572632, + "epoch": 0.75, + "learning_rate": 4.861375111065482e-05, + "loss": 0.5811, + "step": 788, + "task_loss": 0.4562370777130127 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7255194826515758, + "compression_loss": 0.0, + "distillation_loss": 0.5340683460235596, + "epoch": 0.75, + "learning_rate": 4.861025012960389e-05, + "loss": 0.5078, + "step": 789, + "task_loss": 0.27188482880592346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7255487404163383, + "compression_loss": 0.0, + "distillation_loss": 0.23920854926109314, + "epoch": 0.75, + "learning_rate": 4.8606744859637316e-05, + "loss": 0.2273, + "step": 790, + "task_loss": 0.11962777376174927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.725577990517988, + "compression_loss": 0.0, + "distillation_loss": 0.25565600395202637, + "epoch": 0.75, + "learning_rate": 4.8603235301391844e-05, + "loss": 0.2569, + "step": 791, + "task_loss": 0.26804494857788086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7256072329575285, + "compression_loss": 0.0, + "distillation_loss": 0.2217859923839569, + "epoch": 0.75, + "learning_rate": 4.859972145550501e-05, + "loss": 0.2133, + "step": 792, + "task_loss": 0.13717922568321228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7256364677359637, + "compression_loss": 0.0, + "distillation_loss": 0.24465468525886536, + "epoch": 0.75, + "learning_rate": 4.859620332261512e-05, + "loss": 0.2358, + "step": 793, + "task_loss": 0.15646812319755554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7256656948542971, + "compression_loss": 0.0, + "distillation_loss": 0.23842063546180725, + "epoch": 0.75, + "learning_rate": 4.8592680903361247e-05, + "loss": 0.2273, + "step": 794, + "task_loss": 0.12698203325271606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7256949143135324, + "compression_loss": 0.0, + "distillation_loss": 0.26218464970588684, + "epoch": 0.75, + "learning_rate": 4.858915419838327e-05, + "loss": 0.2558, + "step": 795, + "task_loss": 0.19839191436767578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7257241261146734, + "compression_loss": 0.0, + "distillation_loss": 0.5854094624519348, + "epoch": 0.76, + "learning_rate": 4.8585623208321825e-05, + "loss": 0.5535, + "step": 796, + "task_loss": 0.2661326229572296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7257533302587237, + "compression_loss": 0.0, + "distillation_loss": 0.18930557370185852, + "epoch": 0.76, + "learning_rate": 4.858208793381833e-05, + "loss": 0.1816, + "step": 797, + "task_loss": 0.11239316314458847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.725782526746687, + "compression_loss": 0.0, + "distillation_loss": 0.35272884368896484, + "epoch": 0.76, + "learning_rate": 4.8578548375514995e-05, + "loss": 0.3343, + "step": 798, + "task_loss": 0.16825662553310394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7258117155795669, + "compression_loss": 0.0, + "distillation_loss": 0.6028088331222534, + "epoch": 0.76, + "learning_rate": 4.8575004534054794e-05, + "loss": 0.5743, + "step": 799, + "task_loss": 0.31754568219184875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7258408967583673, + "compression_loss": 0.0, + "distillation_loss": 0.16018742322921753, + "epoch": 0.76, + "learning_rate": 4.8571456410081474e-05, + "loss": 0.1666, + "step": 800, + "task_loss": 0.22434723377227783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7258700702840917, + "compression_loss": 0.0, + "distillation_loss": 0.346615195274353, + "epoch": 0.76, + "learning_rate": 4.856790400423958e-05, + "loss": 0.3398, + "step": 801, + "task_loss": 0.2779731750488281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7258992361577438, + "compression_loss": 0.0, + "distillation_loss": 0.40023940801620483, + "epoch": 0.76, + "learning_rate": 4.856434731717442e-05, + "loss": 0.388, + "step": 802, + "task_loss": 0.27803096175193787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7259283943803273, + "compression_loss": 0.0, + "distillation_loss": 0.213174507021904, + "epoch": 0.76, + "learning_rate": 4.8560786349532075e-05, + "loss": 0.199, + "step": 803, + "task_loss": 0.07103019952774048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7259575449528458, + "compression_loss": 0.0, + "distillation_loss": 0.15435612201690674, + "epoch": 0.76, + "learning_rate": 4.855722110195943e-05, + "loss": 0.145, + "step": 804, + "task_loss": 0.06065506860613823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7259866878763032, + "compression_loss": 0.0, + "distillation_loss": 0.20377478003501892, + "epoch": 0.76, + "learning_rate": 4.8553651575104114e-05, + "loss": 0.1985, + "step": 805, + "task_loss": 0.15101395547389984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.726015823151703, + "compression_loss": 0.0, + "distillation_loss": 0.3351978063583374, + "epoch": 0.77, + "learning_rate": 4.8550077769614554e-05, + "loss": 0.3193, + "step": 806, + "task_loss": 0.17666833102703094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7260449507800489, + "compression_loss": 0.0, + "distillation_loss": 0.2128250151872635, + "epoch": 0.77, + "learning_rate": 4.8546499686139944e-05, + "loss": 0.1992, + "step": 807, + "task_loss": 0.07669594138860703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7260740707623446, + "compression_loss": 0.0, + "distillation_loss": 0.22992324829101562, + "epoch": 0.77, + "learning_rate": 4.854291732533027e-05, + "loss": 0.2204, + "step": 808, + "task_loss": 0.13490459322929382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.726103183099594, + "compression_loss": 0.0, + "distillation_loss": 0.1448119878768921, + "epoch": 0.77, + "learning_rate": 4.853933068783628e-05, + "loss": 0.1417, + "step": 809, + "task_loss": 0.11348484456539154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7261322877928004, + "compression_loss": 0.0, + "distillation_loss": 0.16505393385887146, + "epoch": 0.77, + "learning_rate": 4.853573977430951e-05, + "loss": 0.1539, + "step": 810, + "task_loss": 0.053132861852645874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7261613848429677, + "compression_loss": 0.0, + "distillation_loss": 0.15083172917366028, + "epoch": 0.77, + "learning_rate": 4.8532144585402254e-05, + "loss": 0.1491, + "step": 811, + "task_loss": 0.13310600817203522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7261904742510995, + "compression_loss": 0.0, + "distillation_loss": 0.29990440607070923, + "epoch": 0.77, + "learning_rate": 4.85285451217676e-05, + "loss": 0.2841, + "step": 812, + "task_loss": 0.1421201378107071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7262195560181995, + "compression_loss": 0.0, + "distillation_loss": 0.20483794808387756, + "epoch": 0.77, + "learning_rate": 4.8524941384059415e-05, + "loss": 0.1949, + "step": 813, + "task_loss": 0.10570985078811646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7262486301452715, + "compression_loss": 0.0, + "distillation_loss": 0.152552530169487, + "epoch": 0.77, + "learning_rate": 4.8521333372932326e-05, + "loss": 0.1494, + "step": 814, + "task_loss": 0.12140922248363495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7262776966333191, + "compression_loss": 0.0, + "distillation_loss": 0.2609613537788391, + "epoch": 0.77, + "learning_rate": 4.851772108904175e-05, + "loss": 0.2615, + "step": 815, + "task_loss": 0.2658497095108032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7263067554833459, + "compression_loss": 0.0, + "distillation_loss": 0.07736967504024506, + "epoch": 0.77, + "learning_rate": 4.851410453304388e-05, + "loss": 0.0738, + "step": 816, + "task_loss": 0.041510533541440964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7263358066963557, + "compression_loss": 0.0, + "distillation_loss": 0.17229236662387848, + "epoch": 0.78, + "learning_rate": 4.851048370559567e-05, + "loss": 0.1659, + "step": 817, + "task_loss": 0.10811686515808105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7263648502733522, + "compression_loss": 0.0, + "distillation_loss": 0.15134498476982117, + "epoch": 0.78, + "learning_rate": 4.850685860735487e-05, + "loss": 0.1597, + "step": 818, + "task_loss": 0.23519474267959595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7263938862153388, + "compression_loss": 0.0, + "distillation_loss": 0.42961549758911133, + "epoch": 0.78, + "learning_rate": 4.850322923898e-05, + "loss": 0.408, + "step": 819, + "task_loss": 0.21310321986675262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7264229145233196, + "compression_loss": 0.0, + "distillation_loss": 0.15049859881401062, + "epoch": 0.78, + "learning_rate": 4.8499595601130337e-05, + "loss": 0.1421, + "step": 820, + "task_loss": 0.06645572930574417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.726451935198298, + "compression_loss": 0.0, + "distillation_loss": 0.1578487902879715, + "epoch": 0.78, + "learning_rate": 4.849595769446596e-05, + "loss": 0.1482, + "step": 821, + "task_loss": 0.06112413853406906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7264809482412778, + "compression_loss": 0.0, + "distillation_loss": 0.26902398467063904, + "epoch": 0.78, + "learning_rate": 4.849231551964771e-05, + "loss": 0.2534, + "step": 822, + "task_loss": 0.1128850132226944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7265099536532627, + "compression_loss": 0.0, + "distillation_loss": 0.2599482536315918, + "epoch": 0.78, + "learning_rate": 4.848866907733721e-05, + "loss": 0.2607, + "step": 823, + "task_loss": 0.26737767457962036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7265389514352562, + "compression_loss": 0.0, + "distillation_loss": 0.4169207513332367, + "epoch": 0.78, + "learning_rate": 4.848501836819684e-05, + "loss": 0.4028, + "step": 824, + "task_loss": 0.2758685052394867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7265679415882622, + "compression_loss": 0.0, + "distillation_loss": 0.09598681330680847, + "epoch": 0.78, + "learning_rate": 4.848136339288979e-05, + "loss": 0.0934, + "step": 825, + "task_loss": 0.06983894854784012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7265969241132842, + "compression_loss": 0.0, + "distillation_loss": 0.7514071464538574, + "epoch": 0.78, + "learning_rate": 4.8477704152079984e-05, + "loss": 0.7297, + "step": 826, + "task_loss": 0.5340147614479065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7266258990113261, + "compression_loss": 0.0, + "distillation_loss": 0.41649430990219116, + "epoch": 0.79, + "learning_rate": 4.8474040646432153e-05, + "loss": 0.4065, + "step": 827, + "task_loss": 0.31665799021720886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7266548662833914, + "compression_loss": 0.0, + "distillation_loss": 0.2038126438856125, + "epoch": 0.79, + "learning_rate": 4.8470372876611784e-05, + "loss": 0.1918, + "step": 828, + "task_loss": 0.08404532074928284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7266838259304838, + "compression_loss": 0.0, + "distillation_loss": 0.09588038921356201, + "epoch": 0.79, + "learning_rate": 4.846670084328515e-05, + "loss": 0.109, + "step": 829, + "task_loss": 0.22751304507255554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7267127779536071, + "compression_loss": 0.0, + "distillation_loss": 0.44214335083961487, + "epoch": 0.79, + "learning_rate": 4.846302454711929e-05, + "loss": 0.4217, + "step": 830, + "task_loss": 0.23792800307273865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7267417223537649, + "compression_loss": 0.0, + "distillation_loss": 0.23889368772506714, + "epoch": 0.79, + "learning_rate": 4.845934398878202e-05, + "loss": 0.2285, + "step": 831, + "task_loss": 0.13461318612098694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7267706591319608, + "compression_loss": 0.0, + "distillation_loss": 0.23278114199638367, + "epoch": 0.79, + "learning_rate": 4.845565916894193e-05, + "loss": 0.2227, + "step": 832, + "task_loss": 0.13188397884368896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7267995882891986, + "compression_loss": 0.0, + "distillation_loss": 0.3519028425216675, + "epoch": 0.79, + "learning_rate": 4.8451970088268396e-05, + "loss": 0.344, + "step": 833, + "task_loss": 0.27317169308662415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.726828509826482, + "compression_loss": 0.0, + "distillation_loss": 0.19134777784347534, + "epoch": 0.79, + "learning_rate": 4.8448276747431545e-05, + "loss": 0.1843, + "step": 834, + "task_loss": 0.12132743000984192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7268574237448145, + "compression_loss": 0.0, + "distillation_loss": 0.5469926595687866, + "epoch": 0.79, + "learning_rate": 4.84445791471023e-05, + "loss": 0.5193, + "step": 835, + "task_loss": 0.27034249901771545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7268863300452001, + "compression_loss": 0.0, + "distillation_loss": 0.22652700543403625, + "epoch": 0.79, + "learning_rate": 4.8440877287952336e-05, + "loss": 0.2205, + "step": 836, + "task_loss": 0.16659414768218994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7269152287286422, + "compression_loss": 0.0, + "distillation_loss": 0.1271471381187439, + "epoch": 0.79, + "learning_rate": 4.8437171170654125e-05, + "loss": 0.1293, + "step": 837, + "task_loss": 0.1484624445438385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7269441197961446, + "compression_loss": 0.0, + "distillation_loss": 0.2708894908428192, + "epoch": 0.8, + "learning_rate": 4.843346079588089e-05, + "loss": 0.257, + "step": 838, + "task_loss": 0.132267564535141 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7269730032487108, + "compression_loss": 0.0, + "distillation_loss": 0.3023916482925415, + "epoch": 0.8, + "learning_rate": 4.842974616430665e-05, + "loss": 0.2919, + "step": 839, + "task_loss": 0.197329580783844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7270018790873449, + "compression_loss": 0.0, + "distillation_loss": 0.30287623405456543, + "epoch": 0.8, + "learning_rate": 4.842602727660618e-05, + "loss": 0.3182, + "step": 840, + "task_loss": 0.4560437798500061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7270307473130502, + "compression_loss": 0.0, + "distillation_loss": 0.23289792239665985, + "epoch": 0.8, + "learning_rate": 4.842230413345503e-05, + "loss": 0.2203, + "step": 841, + "task_loss": 0.10670986026525497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7270596079268306, + "compression_loss": 0.0, + "distillation_loss": 0.1355813890695572, + "epoch": 0.8, + "learning_rate": 4.8418576735529535e-05, + "loss": 0.1275, + "step": 842, + "task_loss": 0.054461102932691574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7270884609296895, + "compression_loss": 0.0, + "distillation_loss": 0.20015108585357666, + "epoch": 0.8, + "learning_rate": 4.841484508350679e-05, + "loss": 0.194, + "step": 843, + "task_loss": 0.13830646872520447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7271173063226308, + "compression_loss": 0.0, + "distillation_loss": 0.2658821940422058, + "epoch": 0.8, + "learning_rate": 4.841110917806467e-05, + "loss": 0.2551, + "step": 844, + "task_loss": 0.15778332948684692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7271461441066583, + "compression_loss": 0.0, + "distillation_loss": 0.35619670152664185, + "epoch": 0.8, + "learning_rate": 4.840736901988182e-05, + "loss": 0.3489, + "step": 845, + "task_loss": 0.2832150459289551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7271749742827754, + "compression_loss": 0.0, + "distillation_loss": 0.35258805751800537, + "epoch": 0.8, + "learning_rate": 4.840362460963765e-05, + "loss": 0.3371, + "step": 846, + "task_loss": 0.19759327173233032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7272037968519861, + "compression_loss": 0.0, + "distillation_loss": 0.27292773127555847, + "epoch": 0.8, + "learning_rate": 4.8399875948012355e-05, + "loss": 0.2561, + "step": 847, + "task_loss": 0.10433943569660187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7272326118152938, + "compression_loss": 0.0, + "distillation_loss": 0.44316327571868896, + "epoch": 0.81, + "learning_rate": 4.8396123035686906e-05, + "loss": 0.4312, + "step": 848, + "task_loss": 0.323985755443573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7272614191737022, + "compression_loss": 0.0, + "distillation_loss": 0.3451952636241913, + "epoch": 0.81, + "learning_rate": 4.839236587334303e-05, + "loss": 0.3296, + "step": 849, + "task_loss": 0.1894971877336502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7272902189282151, + "compression_loss": 0.0, + "distillation_loss": 0.3476291000843048, + "epoch": 0.81, + "learning_rate": 4.8388604461663236e-05, + "loss": 0.3311, + "step": 850, + "task_loss": 0.18212732672691345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7273190110798362, + "compression_loss": 0.0, + "distillation_loss": 0.19594484567642212, + "epoch": 0.81, + "learning_rate": 4.838483880133079e-05, + "loss": 0.1966, + "step": 851, + "task_loss": 0.20298801362514496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7273477956295691, + "compression_loss": 0.0, + "distillation_loss": 0.2919715940952301, + "epoch": 0.81, + "learning_rate": 4.8381068893029766e-05, + "loss": 0.2813, + "step": 852, + "task_loss": 0.18507151305675507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7273765725784175, + "compression_loss": 0.0, + "distillation_loss": 0.2512668967247009, + "epoch": 0.81, + "learning_rate": 4.837729473744497e-05, + "loss": 0.2501, + "step": 853, + "task_loss": 0.2395791858434677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7274053419273852, + "compression_loss": 0.0, + "distillation_loss": 0.3293880224227905, + "epoch": 0.81, + "learning_rate": 4.8373516335261994e-05, + "loss": 0.3175, + "step": 854, + "task_loss": 0.21075962483882904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7274341036774756, + "compression_loss": 0.0, + "distillation_loss": 0.30303874611854553, + "epoch": 0.81, + "learning_rate": 4.8369733687167204e-05, + "loss": 0.2945, + "step": 855, + "task_loss": 0.2173597514629364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7274628578296926, + "compression_loss": 0.0, + "distillation_loss": 0.1690731942653656, + "epoch": 0.81, + "learning_rate": 4.836594679384775e-05, + "loss": 0.1559, + "step": 856, + "task_loss": 0.03761624917387962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7274916043850399, + "compression_loss": 0.0, + "distillation_loss": 0.3940788507461548, + "epoch": 0.81, + "learning_rate": 4.836215565599152e-05, + "loss": 0.383, + "step": 857, + "task_loss": 0.2832016944885254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7275203433445211, + "compression_loss": 0.0, + "distillation_loss": 0.49104076623916626, + "epoch": 0.81, + "learning_rate": 4.835836027428722e-05, + "loss": 0.4723, + "step": 858, + "task_loss": 0.3033701777458191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7275490747091399, + "compression_loss": 0.0, + "distillation_loss": 0.34213539958000183, + "epoch": 0.82, + "learning_rate": 4.8354560649424264e-05, + "loss": 0.3263, + "step": 859, + "task_loss": 0.18411041796207428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7275777984798999, + "compression_loss": 0.0, + "distillation_loss": 0.10528973489999771, + "epoch": 0.82, + "learning_rate": 4.8350756782092894e-05, + "loss": 0.1246, + "step": 860, + "task_loss": 0.2985772490501404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7276065146578049, + "compression_loss": 0.0, + "distillation_loss": 0.28886890411376953, + "epoch": 0.82, + "learning_rate": 4.8346948672984096e-05, + "loss": 0.2806, + "step": 861, + "task_loss": 0.20596402883529663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7276352232438587, + "compression_loss": 0.0, + "distillation_loss": 0.24927183985710144, + "epoch": 0.82, + "learning_rate": 4.8343136322789626e-05, + "loss": 0.2412, + "step": 862, + "task_loss": 0.16882850229740143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7276639242390647, + "compression_loss": 0.0, + "distillation_loss": 0.48319730162620544, + "epoch": 0.82, + "learning_rate": 4.8339319732202024e-05, + "loss": 0.4605, + "step": 863, + "task_loss": 0.2564105689525604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7276926176444267, + "compression_loss": 0.0, + "distillation_loss": 0.272256463766098, + "epoch": 0.82, + "learning_rate": 4.83354989019146e-05, + "loss": 0.261, + "step": 864, + "task_loss": 0.16006284952163696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7277213034609484, + "compression_loss": 0.0, + "distillation_loss": 0.43346107006073, + "epoch": 0.82, + "learning_rate": 4.83316738326214e-05, + "loss": 0.4201, + "step": 865, + "task_loss": 0.30030304193496704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7277499816896336, + "compression_loss": 0.0, + "distillation_loss": 0.2226218283176422, + "epoch": 0.82, + "learning_rate": 4.832784452501729e-05, + "loss": 0.2106, + "step": 866, + "task_loss": 0.1027415320277214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7277786523314856, + "compression_loss": 0.0, + "distillation_loss": 0.4321751892566681, + "epoch": 0.82, + "learning_rate": 4.8324010979797875e-05, + "loss": 0.4115, + "step": 867, + "task_loss": 0.22510364651679993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7278073153875085, + "compression_loss": 0.0, + "distillation_loss": 0.4015240967273712, + "epoch": 0.82, + "learning_rate": 4.8320173197659534e-05, + "loss": 0.3924, + "step": 868, + "task_loss": 0.310598224401474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7278359708587059, + "compression_loss": 0.0, + "distillation_loss": 0.1309269368648529, + "epoch": 0.83, + "learning_rate": 4.831633117929942e-05, + "loss": 0.1323, + "step": 869, + "task_loss": 0.1442694216966629 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7278646187460813, + "compression_loss": 0.0, + "distillation_loss": 0.34364479780197144, + "epoch": 0.83, + "learning_rate": 4.831248492541545e-05, + "loss": 0.3322, + "step": 870, + "task_loss": 0.2290574610233307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7278932590506385, + "compression_loss": 0.0, + "distillation_loss": 0.4297097623348236, + "epoch": 0.83, + "learning_rate": 4.830863443670632e-05, + "loss": 0.4357, + "step": 871, + "task_loss": 0.48918217420578003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7279218917733812, + "compression_loss": 0.0, + "distillation_loss": 0.3155050277709961, + "epoch": 0.83, + "learning_rate": 4.8304779713871495e-05, + "loss": 0.3033, + "step": 872, + "task_loss": 0.19333837926387787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7279505169153131, + "compression_loss": 0.0, + "distillation_loss": 0.19998089969158173, + "epoch": 0.83, + "learning_rate": 4.83009207576112e-05, + "loss": 0.1878, + "step": 873, + "task_loss": 0.07784054428339005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7279791344774377, + "compression_loss": 0.0, + "distillation_loss": 0.35912150144577026, + "epoch": 0.83, + "learning_rate": 4.829705756862642e-05, + "loss": 0.3322, + "step": 874, + "task_loss": 0.09007196873426437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7280077444607589, + "compression_loss": 0.0, + "distillation_loss": 0.42145901918411255, + "epoch": 0.83, + "learning_rate": 4.829319014761894e-05, + "loss": 0.4086, + "step": 875, + "task_loss": 0.29324570298194885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7280363468662803, + "compression_loss": 0.0, + "distillation_loss": 0.5021111369132996, + "epoch": 0.83, + "learning_rate": 4.828931849529129e-05, + "loss": 0.4807, + "step": 876, + "task_loss": 0.28812965750694275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7280649416950056, + "compression_loss": 0.0, + "distillation_loss": 0.33752530813217163, + "epoch": 0.83, + "learning_rate": 4.8285442612346774e-05, + "loss": 0.318, + "step": 877, + "task_loss": 0.14252959191799164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7280935289479383, + "compression_loss": 0.0, + "distillation_loss": 0.24429580569267273, + "epoch": 0.83, + "learning_rate": 4.828156249948946e-05, + "loss": 0.2482, + "step": 878, + "task_loss": 0.2833808660507202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7281221086260824, + "compression_loss": 0.0, + "distillation_loss": 0.16154003143310547, + "epoch": 0.83, + "learning_rate": 4.827767815742419e-05, + "loss": 0.1566, + "step": 879, + "task_loss": 0.11249750107526779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7281506807304414, + "compression_loss": 0.0, + "distillation_loss": 0.2341882586479187, + "epoch": 0.84, + "learning_rate": 4.8273789586856574e-05, + "loss": 0.2249, + "step": 880, + "task_loss": 0.14081409573554993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.728179245262019, + "compression_loss": 0.0, + "distillation_loss": 0.22025887668132782, + "epoch": 0.84, + "learning_rate": 4.8269896788493e-05, + "loss": 0.2063, + "step": 881, + "task_loss": 0.0802309438586235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7282078022218189, + "compression_loss": 0.0, + "distillation_loss": 0.2393018901348114, + "epoch": 0.84, + "learning_rate": 4.8265999763040603e-05, + "loss": 0.2291, + "step": 882, + "task_loss": 0.13759064674377441 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7282363516108448, + "compression_loss": 0.0, + "distillation_loss": 0.22338096797466278, + "epoch": 0.84, + "learning_rate": 4.8262098511207295e-05, + "loss": 0.2188, + "step": 883, + "task_loss": 0.17738208174705505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7282648934301004, + "compression_loss": 0.0, + "distillation_loss": 0.250945508480072, + "epoch": 0.84, + "learning_rate": 4.825819303370177e-05, + "loss": 0.2357, + "step": 884, + "task_loss": 0.0986076220870018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7282934276805891, + "compression_loss": 0.0, + "distillation_loss": 0.32160013914108276, + "epoch": 0.84, + "learning_rate": 4.8254283331233464e-05, + "loss": 0.3065, + "step": 885, + "task_loss": 0.1702209860086441 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.728321954363315, + "compression_loss": 0.0, + "distillation_loss": 0.13798242807388306, + "epoch": 0.84, + "learning_rate": 4.825036940451259e-05, + "loss": 0.1329, + "step": 886, + "task_loss": 0.08686836808919907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7283504734792816, + "compression_loss": 0.0, + "distillation_loss": 0.28318336606025696, + "epoch": 0.84, + "learning_rate": 4.8246451254250145e-05, + "loss": 0.2692, + "step": 887, + "task_loss": 0.14343897998332977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7283789850294926, + "compression_loss": 0.0, + "distillation_loss": 0.31278300285339355, + "epoch": 0.84, + "learning_rate": 4.8242528881157866e-05, + "loss": 0.306, + "step": 888, + "task_loss": 0.24526852369308472 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7284074890149516, + "compression_loss": 0.0, + "distillation_loss": 0.5371044874191284, + "epoch": 0.84, + "learning_rate": 4.823860228594829e-05, + "loss": 0.5197, + "step": 889, + "task_loss": 0.36329126358032227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7284359854366623, + "compression_loss": 0.0, + "distillation_loss": 0.19384345412254333, + "epoch": 0.85, + "learning_rate": 4.823467146933468e-05, + "loss": 0.1807, + "step": 890, + "task_loss": 0.06265727430582047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7284644742956286, + "compression_loss": 0.0, + "distillation_loss": 0.37346696853637695, + "epoch": 0.85, + "learning_rate": 4.823073643203111e-05, + "loss": 0.3685, + "step": 891, + "task_loss": 0.3242899179458618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7284929555928539, + "compression_loss": 0.0, + "distillation_loss": 0.4346903860569, + "epoch": 0.85, + "learning_rate": 4.822679717475237e-05, + "loss": 0.4304, + "step": 892, + "task_loss": 0.39173489809036255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7285214293293419, + "compression_loss": 0.0, + "distillation_loss": 0.36973273754119873, + "epoch": 0.85, + "learning_rate": 4.8222853698214076e-05, + "loss": 0.3513, + "step": 893, + "task_loss": 0.18498845398426056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7285498955060965, + "compression_loss": 0.0, + "distillation_loss": 0.3774262070655823, + "epoch": 0.85, + "learning_rate": 4.8218906003132555e-05, + "loss": 0.3675, + "step": 894, + "task_loss": 0.278170645236969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7285783541241213, + "compression_loss": 0.0, + "distillation_loss": 0.43660831451416016, + "epoch": 0.85, + "learning_rate": 4.8214954090224946e-05, + "loss": 0.4236, + "step": 895, + "task_loss": 0.3061750829219818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7286068051844199, + "compression_loss": 0.0, + "distillation_loss": 0.2611873149871826, + "epoch": 0.85, + "learning_rate": 4.8210997960209114e-05, + "loss": 0.2515, + "step": 896, + "task_loss": 0.16423273086547852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7286352486879959, + "compression_loss": 0.0, + "distillation_loss": 0.2831340730190277, + "epoch": 0.85, + "learning_rate": 4.8207037613803715e-05, + "loss": 0.2735, + "step": 897, + "task_loss": 0.18677663803100586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7286636846358533, + "compression_loss": 0.0, + "distillation_loss": 0.38769102096557617, + "epoch": 0.85, + "learning_rate": 4.820307305172818e-05, + "loss": 0.3674, + "step": 898, + "task_loss": 0.1845768690109253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7286921130289954, + "compression_loss": 0.0, + "distillation_loss": 0.28008437156677246, + "epoch": 0.85, + "learning_rate": 4.8199104274702666e-05, + "loss": 0.265, + "step": 899, + "task_loss": 0.1291656196117401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7287205338684262, + "compression_loss": 0.0, + "distillation_loss": 0.12043634802103043, + "epoch": 0.85, + "learning_rate": 4.819513128344814e-05, + "loss": 0.1209, + "step": 900, + "task_loss": 0.12554529309272766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7287489471551492, + "compression_loss": 0.0, + "distillation_loss": 0.2257360816001892, + "epoch": 0.86, + "learning_rate": 4.8191154078686306e-05, + "loss": 0.2282, + "step": 901, + "task_loss": 0.25028055906295776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7287773528901681, + "compression_loss": 0.0, + "distillation_loss": 0.2224644273519516, + "epoch": 0.86, + "learning_rate": 4.8187172661139636e-05, + "loss": 0.2194, + "step": 902, + "task_loss": 0.19155338406562805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7288057510744866, + "compression_loss": 0.0, + "distillation_loss": 0.1225384920835495, + "epoch": 0.86, + "learning_rate": 4.818318703153139e-05, + "loss": 0.1325, + "step": 903, + "task_loss": 0.2224058359861374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7288341417091085, + "compression_loss": 0.0, + "distillation_loss": 0.5352866649627686, + "epoch": 0.86, + "learning_rate": 4.817919719058557e-05, + "loss": 0.528, + "step": 904, + "task_loss": 0.4623543620109558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7288625247950373, + "compression_loss": 0.0, + "distillation_loss": 0.2698153257369995, + "epoch": 0.86, + "learning_rate": 4.8175203139026934e-05, + "loss": 0.2603, + "step": 905, + "task_loss": 0.17438843846321106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7288909003332769, + "compression_loss": 0.0, + "distillation_loss": 0.7470159530639648, + "epoch": 0.86, + "learning_rate": 4.817120487758104e-05, + "loss": 0.712, + "step": 906, + "task_loss": 0.39666780829429626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7289192683248307, + "compression_loss": 0.0, + "distillation_loss": 0.22724510729312897, + "epoch": 0.86, + "learning_rate": 4.81672024069742e-05, + "loss": 0.2208, + "step": 907, + "task_loss": 0.16269180178642273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7289476287707025, + "compression_loss": 0.0, + "distillation_loss": 0.3366519808769226, + "epoch": 0.86, + "learning_rate": 4.816319572793345e-05, + "loss": 0.3336, + "step": 908, + "task_loss": 0.3060190975666046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.728975981671896, + "compression_loss": 0.0, + "distillation_loss": 0.1925545036792755, + "epoch": 0.86, + "learning_rate": 4.815918484118665e-05, + "loss": 0.18, + "step": 909, + "task_loss": 0.0666121393442154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.729004327029415, + "compression_loss": 0.0, + "distillation_loss": 0.1643257886171341, + "epoch": 0.86, + "learning_rate": 4.815516974746239e-05, + "loss": 0.151, + "step": 910, + "task_loss": 0.030893657356500626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.729032664844263, + "compression_loss": 0.0, + "distillation_loss": 0.20738086104393005, + "epoch": 0.87, + "learning_rate": 4.815115044749003e-05, + "loss": 0.1996, + "step": 911, + "task_loss": 0.12986215949058533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7290609951174438, + "compression_loss": 0.0, + "distillation_loss": 0.2494029700756073, + "epoch": 0.87, + "learning_rate": 4.814712694199969e-05, + "loss": 0.2375, + "step": 912, + "task_loss": 0.13066712021827698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.729089317849961, + "compression_loss": 0.0, + "distillation_loss": 0.24174784123897552, + "epoch": 0.87, + "learning_rate": 4.814309923172227e-05, + "loss": 0.2271, + "step": 913, + "task_loss": 0.09568721055984497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7291176330428183, + "compression_loss": 0.0, + "distillation_loss": 0.4642338752746582, + "epoch": 0.87, + "learning_rate": 4.81390673173894e-05, + "loss": 0.4392, + "step": 914, + "task_loss": 0.2140159010887146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7291459406970194, + "compression_loss": 0.0, + "distillation_loss": 0.28287458419799805, + "epoch": 0.87, + "learning_rate": 4.8135031199733524e-05, + "loss": 0.2737, + "step": 915, + "task_loss": 0.19124047458171844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7291742408135681, + "compression_loss": 0.0, + "distillation_loss": 0.29212427139282227, + "epoch": 0.87, + "learning_rate": 4.813099087948781e-05, + "loss": 0.281, + "step": 916, + "task_loss": 0.18083734810352325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7292025333934679, + "compression_loss": 0.0, + "distillation_loss": 0.2220340222120285, + "epoch": 0.87, + "learning_rate": 4.812694635738621e-05, + "loss": 0.2106, + "step": 917, + "task_loss": 0.10801569372415543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7292308184377224, + "compression_loss": 0.0, + "distillation_loss": 0.1814860701560974, + "epoch": 0.87, + "learning_rate": 4.812289763416341e-05, + "loss": 0.1735, + "step": 918, + "task_loss": 0.10150802880525589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7292590959473356, + "compression_loss": 0.0, + "distillation_loss": 0.1173454150557518, + "epoch": 0.87, + "learning_rate": 4.81188447105549e-05, + "loss": 0.11, + "step": 919, + "task_loss": 0.0437774695456028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7292873659233109, + "compression_loss": 0.0, + "distillation_loss": 0.28012198209762573, + "epoch": 0.87, + "learning_rate": 4.811478758729691e-05, + "loss": 0.2636, + "step": 920, + "task_loss": 0.11441508680582047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7293156283666522, + "compression_loss": 0.0, + "distillation_loss": 0.29111599922180176, + "epoch": 0.87, + "learning_rate": 4.811072626512642e-05, + "loss": 0.2741, + "step": 921, + "task_loss": 0.12139546126127243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.729343883278363, + "compression_loss": 0.0, + "distillation_loss": 0.36805009841918945, + "epoch": 0.88, + "learning_rate": 4.810666074478121e-05, + "loss": 0.3586, + "step": 922, + "task_loss": 0.2733895480632782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7293721306594471, + "compression_loss": 0.0, + "distillation_loss": 0.2743581533432007, + "epoch": 0.88, + "learning_rate": 4.8102591026999796e-05, + "loss": 0.2591, + "step": 923, + "task_loss": 0.12169023603200912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7294003705109081, + "compression_loss": 0.0, + "distillation_loss": 0.12222736328840256, + "epoch": 0.88, + "learning_rate": 4.8098517112521456e-05, + "loss": 0.1283, + "step": 924, + "task_loss": 0.18253932893276215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7294286028337497, + "compression_loss": 0.0, + "distillation_loss": 0.38871803879737854, + "epoch": 0.88, + "learning_rate": 4.8094439002086234e-05, + "loss": 0.3813, + "step": 925, + "task_loss": 0.31461140513420105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7294568276289756, + "compression_loss": 0.0, + "distillation_loss": 0.07011576741933823, + "epoch": 0.88, + "learning_rate": 4.809035669643495e-05, + "loss": 0.0841, + "step": 926, + "task_loss": 0.21029669046401978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7294850448975896, + "compression_loss": 0.0, + "distillation_loss": 0.2029143124818802, + "epoch": 0.88, + "learning_rate": 4.808627019630917e-05, + "loss": 0.1999, + "step": 927, + "task_loss": 0.17250923812389374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7295132546405951, + "compression_loss": 0.0, + "distillation_loss": 0.4461289942264557, + "epoch": 0.88, + "learning_rate": 4.808217950245122e-05, + "loss": 0.4298, + "step": 928, + "task_loss": 0.28285056352615356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7295414568589961, + "compression_loss": 0.0, + "distillation_loss": 0.2739975154399872, + "epoch": 0.88, + "learning_rate": 4.807808461560419e-05, + "loss": 0.2818, + "step": 929, + "task_loss": 0.35219091176986694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.729569651553796, + "compression_loss": 0.0, + "distillation_loss": 0.14522334933280945, + "epoch": 0.88, + "learning_rate": 4.8073985536511956e-05, + "loss": 0.1395, + "step": 930, + "task_loss": 0.08773127943277359 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7295978387259987, + "compression_loss": 0.0, + "distillation_loss": 0.1583382934331894, + "epoch": 0.88, + "learning_rate": 4.806988226591912e-05, + "loss": 0.1488, + "step": 931, + "task_loss": 0.06294950842857361 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7296260183766078, + "compression_loss": 0.0, + "distillation_loss": 0.42467373609542847, + "epoch": 0.89, + "learning_rate": 4.806577480457106e-05, + "loss": 0.4083, + "step": 932, + "task_loss": 0.26088839769363403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7296541905066269, + "compression_loss": 0.0, + "distillation_loss": 0.2598496377468109, + "epoch": 0.89, + "learning_rate": 4.8061663153213935e-05, + "loss": 0.2517, + "step": 933, + "task_loss": 0.17828907072544098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7296823551170598, + "compression_loss": 0.0, + "distillation_loss": 0.26050207018852234, + "epoch": 0.89, + "learning_rate": 4.805754731259462e-05, + "loss": 0.2466, + "step": 934, + "task_loss": 0.12179625779390335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7297105122089101, + "compression_loss": 0.0, + "distillation_loss": 0.2224908173084259, + "epoch": 0.89, + "learning_rate": 4.805342728346079e-05, + "loss": 0.212, + "step": 935, + "task_loss": 0.11794877797365189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7297386617831816, + "compression_loss": 0.0, + "distillation_loss": 0.2159128487110138, + "epoch": 0.89, + "learning_rate": 4.804930306656087e-05, + "loss": 0.2034, + "step": 936, + "task_loss": 0.090658038854599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7297668038408779, + "compression_loss": 0.0, + "distillation_loss": 0.42296963930130005, + "epoch": 0.89, + "learning_rate": 4.804517466264405e-05, + "loss": 0.4042, + "step": 937, + "task_loss": 0.2355644851922989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7297949383830027, + "compression_loss": 0.0, + "distillation_loss": 0.1279992312192917, + "epoch": 0.89, + "learning_rate": 4.8041042072460244e-05, + "loss": 0.1309, + "step": 938, + "task_loss": 0.15678860247135162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7298230654105596, + "compression_loss": 0.0, + "distillation_loss": 0.4838896095752716, + "epoch": 0.89, + "learning_rate": 4.803690529676019e-05, + "loss": 0.4677, + "step": 939, + "task_loss": 0.3221690356731415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7298511849245524, + "compression_loss": 0.0, + "distillation_loss": 0.3270692229270935, + "epoch": 0.89, + "learning_rate": 4.803276433629534e-05, + "loss": 0.3116, + "step": 940, + "task_loss": 0.17188920080661774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7298792969259846, + "compression_loss": 0.0, + "distillation_loss": 0.1887792944908142, + "epoch": 0.89, + "learning_rate": 4.802861919181793e-05, + "loss": 0.1795, + "step": 941, + "task_loss": 0.09571507573127747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7299074014158602, + "compression_loss": 0.0, + "distillation_loss": 0.33935946226119995, + "epoch": 0.89, + "learning_rate": 4.802446986408093e-05, + "loss": 0.3392, + "step": 942, + "task_loss": 0.33791565895080566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7299354983951827, + "compression_loss": 0.0, + "distillation_loss": 0.33818504214286804, + "epoch": 0.9, + "learning_rate": 4.8020316353838095e-05, + "loss": 0.3372, + "step": 943, + "task_loss": 0.3283797800540924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7299635878649556, + "compression_loss": 0.0, + "distillation_loss": 0.10457701981067657, + "epoch": 0.9, + "learning_rate": 4.8016158661843926e-05, + "loss": 0.1072, + "step": 944, + "task_loss": 0.13106907904148102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7299916698261829, + "compression_loss": 0.0, + "distillation_loss": 0.1351221650838852, + "epoch": 0.9, + "learning_rate": 4.8011996788853686e-05, + "loss": 0.1263, + "step": 945, + "task_loss": 0.04674810171127319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7300197442798682, + "compression_loss": 0.0, + "distillation_loss": 0.3631080389022827, + "epoch": 0.9, + "learning_rate": 4.80078307356234e-05, + "loss": 0.3498, + "step": 946, + "task_loss": 0.2304990440607071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.730047811227015, + "compression_loss": 0.0, + "distillation_loss": 0.42994534969329834, + "epoch": 0.9, + "learning_rate": 4.800366050290986e-05, + "loss": 0.4158, + "step": 947, + "task_loss": 0.28830981254577637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7300758706686271, + "compression_loss": 0.0, + "distillation_loss": 0.3227723240852356, + "epoch": 0.9, + "learning_rate": 4.799948609147061e-05, + "loss": 0.3099, + "step": 948, + "task_loss": 0.19404058158397675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7301039226057082, + "compression_loss": 0.0, + "distillation_loss": 0.20503823459148407, + "epoch": 0.9, + "learning_rate": 4.7995307502063936e-05, + "loss": 0.1954, + "step": 949, + "task_loss": 0.10874759405851364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7301319670392621, + "compression_loss": 0.0, + "distillation_loss": 0.3428541421890259, + "epoch": 0.9, + "learning_rate": 4.799112473544891e-05, + "loss": 0.3248, + "step": 950, + "task_loss": 0.16210930049419403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7301600039702922, + "compression_loss": 0.0, + "distillation_loss": 0.31048500537872314, + "epoch": 0.9, + "learning_rate": 4.7986937792385344e-05, + "loss": 0.2987, + "step": 951, + "task_loss": 0.19261281192302704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7301880333998025, + "compression_loss": 0.0, + "distillation_loss": 0.33201491832733154, + "epoch": 0.9, + "learning_rate": 4.798274667363383e-05, + "loss": 0.3181, + "step": 952, + "task_loss": 0.19296838343143463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7302160553287964, + "compression_loss": 0.0, + "distillation_loss": 0.4270634949207306, + "epoch": 0.91, + "learning_rate": 4.7978551379955684e-05, + "loss": 0.4057, + "step": 953, + "task_loss": 0.21361877024173737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7302440697582777, + "compression_loss": 0.0, + "distillation_loss": 0.12916311621665955, + "epoch": 0.91, + "learning_rate": 4.797435191211302e-05, + "loss": 0.1248, + "step": 954, + "task_loss": 0.08600229024887085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7302720766892501, + "compression_loss": 0.0, + "distillation_loss": 0.17904284596443176, + "epoch": 0.91, + "learning_rate": 4.797014827086869e-05, + "loss": 0.1746, + "step": 955, + "task_loss": 0.13418468832969666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7303000761227173, + "compression_loss": 0.0, + "distillation_loss": 0.12082010507583618, + "epoch": 0.91, + "learning_rate": 4.79659404569863e-05, + "loss": 0.1303, + "step": 956, + "task_loss": 0.21565213799476624 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7303280680596829, + "compression_loss": 0.0, + "distillation_loss": 0.3740869164466858, + "epoch": 0.91, + "learning_rate": 4.7961728471230214e-05, + "loss": 0.3668, + "step": 957, + "task_loss": 0.30169612169265747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7303560525011507, + "compression_loss": 0.0, + "distillation_loss": 0.2538146674633026, + "epoch": 0.91, + "learning_rate": 4.7957512314365574e-05, + "loss": 0.2491, + "step": 958, + "task_loss": 0.20712712407112122 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7303840294481242, + "compression_loss": 0.0, + "distillation_loss": 0.3629278540611267, + "epoch": 0.91, + "learning_rate": 4.7953291987158254e-05, + "loss": 0.3478, + "step": 959, + "task_loss": 0.21124333143234253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7304119989016074, + "compression_loss": 0.0, + "distillation_loss": 0.2513018846511841, + "epoch": 0.91, + "learning_rate": 4.79490674903749e-05, + "loss": 0.2443, + "step": 960, + "task_loss": 0.18155395984649658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7304399608626035, + "compression_loss": 0.0, + "distillation_loss": 0.14075066149234772, + "epoch": 0.91, + "learning_rate": 4.7944838824782916e-05, + "loss": 0.1426, + "step": 961, + "task_loss": 0.15932926535606384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7304679153321166, + "compression_loss": 0.0, + "distillation_loss": 0.25464701652526855, + "epoch": 0.91, + "learning_rate": 4.794060599115045e-05, + "loss": 0.2359, + "step": 962, + "task_loss": 0.06738609075546265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7304958623111502, + "compression_loss": 0.0, + "distillation_loss": 0.12189721316099167, + "epoch": 0.91, + "learning_rate": 4.793636899024643e-05, + "loss": 0.1161, + "step": 963, + "task_loss": 0.06351499259471893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7305238018007081, + "compression_loss": 0.0, + "distillation_loss": 0.13325002789497375, + "epoch": 0.92, + "learning_rate": 4.7932127822840516e-05, + "loss": 0.1313, + "step": 964, + "task_loss": 0.11407996714115143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7305517338017938, + "compression_loss": 0.0, + "distillation_loss": 0.13893647491931915, + "epoch": 0.92, + "learning_rate": 4.792788248970314e-05, + "loss": 0.1442, + "step": 965, + "task_loss": 0.1910799741744995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7305796583154112, + "compression_loss": 0.0, + "distillation_loss": 0.49337151646614075, + "epoch": 0.92, + "learning_rate": 4.79236329916055e-05, + "loss": 0.4772, + "step": 966, + "task_loss": 0.3313041031360626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7306075753425638, + "compression_loss": 0.0, + "distillation_loss": 0.1377095878124237, + "epoch": 0.92, + "learning_rate": 4.79193793293195e-05, + "loss": 0.1368, + "step": 967, + "task_loss": 0.12868079543113708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7306354848842553, + "compression_loss": 0.0, + "distillation_loss": 0.5649617910385132, + "epoch": 0.92, + "learning_rate": 4.791512150361788e-05, + "loss": 0.5411, + "step": 968, + "task_loss": 0.3263978660106659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7306633869414896, + "compression_loss": 0.0, + "distillation_loss": 0.19388450682163239, + "epoch": 0.92, + "learning_rate": 4.791085951527408e-05, + "loss": 0.1838, + "step": 969, + "task_loss": 0.09344847500324249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.73069128151527, + "compression_loss": 0.0, + "distillation_loss": 0.11812691390514374, + "epoch": 0.92, + "learning_rate": 4.7906593365062304e-05, + "loss": 0.1124, + "step": 970, + "task_loss": 0.06074811518192291 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7307191686066006, + "compression_loss": 0.0, + "distillation_loss": 0.2905130386352539, + "epoch": 0.92, + "learning_rate": 4.790232305375752e-05, + "loss": 0.2802, + "step": 971, + "task_loss": 0.18719345331192017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7307470482164847, + "compression_loss": 0.0, + "distillation_loss": 0.41280168294906616, + "epoch": 0.92, + "learning_rate": 4.789804858213547e-05, + "loss": 0.3943, + "step": 972, + "task_loss": 0.2281726449728012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7307749203459263, + "compression_loss": 0.0, + "distillation_loss": 0.5764849185943604, + "epoch": 0.92, + "learning_rate": 4.7893769950972605e-05, + "loss": 0.5575, + "step": 973, + "task_loss": 0.3868526816368103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7308027849959289, + "compression_loss": 0.0, + "distillation_loss": 0.3850351572036743, + "epoch": 0.92, + "learning_rate": 4.788948716104618e-05, + "loss": 0.3583, + "step": 974, + "task_loss": 0.1175268292427063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7308306421674962, + "compression_loss": 0.0, + "distillation_loss": 0.22594526410102844, + "epoch": 0.93, + "learning_rate": 4.7885200213134164e-05, + "loss": 0.2176, + "step": 975, + "task_loss": 0.1424587517976761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7308584918616319, + "compression_loss": 0.0, + "distillation_loss": 0.2664077579975128, + "epoch": 0.93, + "learning_rate": 4.788090910801532e-05, + "loss": 0.2586, + "step": 976, + "task_loss": 0.18784651160240173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7308863340793397, + "compression_loss": 0.0, + "distillation_loss": 0.24012893438339233, + "epoch": 0.93, + "learning_rate": 4.787661384646913e-05, + "loss": 0.2305, + "step": 977, + "task_loss": 0.14341062307357788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7309141688216232, + "compression_loss": 0.0, + "distillation_loss": 0.19192594289779663, + "epoch": 0.93, + "learning_rate": 4.787231442927587e-05, + "loss": 0.1862, + "step": 978, + "task_loss": 0.13485443592071533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7309419960894863, + "compression_loss": 0.0, + "distillation_loss": 0.2409208118915558, + "epoch": 0.93, + "learning_rate": 4.786801085721654e-05, + "loss": 0.2312, + "step": 979, + "task_loss": 0.14332106709480286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7309698158839324, + "compression_loss": 0.0, + "distillation_loss": 0.5161716938018799, + "epoch": 0.93, + "learning_rate": 4.78637031310729e-05, + "loss": 0.4927, + "step": 980, + "task_loss": 0.28135231137275696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7309976282059654, + "compression_loss": 0.0, + "distillation_loss": 0.3326793909072876, + "epoch": 0.93, + "learning_rate": 4.7859391251627474e-05, + "loss": 0.3267, + "step": 981, + "task_loss": 0.2727680802345276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7310254330565888, + "compression_loss": 0.0, + "distillation_loss": 0.1554919183254242, + "epoch": 0.93, + "learning_rate": 4.7855075219663535e-05, + "loss": 0.148, + "step": 982, + "task_loss": 0.08066565543413162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7310532304368065, + "compression_loss": 0.0, + "distillation_loss": 0.34702181816101074, + "epoch": 0.93, + "learning_rate": 4.785075503596511e-05, + "loss": 0.3353, + "step": 983, + "task_loss": 0.2300397753715515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.731081020347622, + "compression_loss": 0.0, + "distillation_loss": 0.07944006472826004, + "epoch": 0.93, + "learning_rate": 4.7846430701316994e-05, + "loss": 0.0734, + "step": 984, + "task_loss": 0.018551897257566452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7311088027900391, + "compression_loss": 0.0, + "distillation_loss": 0.35553303360939026, + "epoch": 0.94, + "learning_rate": 4.78421022165047e-05, + "loss": 0.3459, + "step": 985, + "task_loss": 0.25915029644966125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7311365777650615, + "compression_loss": 0.0, + "distillation_loss": 0.2604832351207733, + "epoch": 0.94, + "learning_rate": 4.783776958231453e-05, + "loss": 0.2522, + "step": 986, + "task_loss": 0.1778934746980667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7311643452736927, + "compression_loss": 0.0, + "distillation_loss": 0.4339986741542816, + "epoch": 0.94, + "learning_rate": 4.783343279953353e-05, + "loss": 0.4111, + "step": 987, + "task_loss": 0.205443874001503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7311921053169365, + "compression_loss": 0.0, + "distillation_loss": 0.09336867183446884, + "epoch": 0.94, + "learning_rate": 4.782909186894949e-05, + "loss": 0.0875, + "step": 988, + "task_loss": 0.03483529016375542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7312198578957965, + "compression_loss": 0.0, + "distillation_loss": 0.3490631878376007, + "epoch": 0.94, + "learning_rate": 4.782474679135097e-05, + "loss": 0.3363, + "step": 989, + "task_loss": 0.22176185250282288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7312476030112766, + "compression_loss": 0.0, + "distillation_loss": 0.31977570056915283, + "epoch": 0.94, + "learning_rate": 4.782039756752727e-05, + "loss": 0.3017, + "step": 990, + "task_loss": 0.13884237408638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7312753406643802, + "compression_loss": 0.0, + "distillation_loss": 0.2667671740055084, + "epoch": 0.94, + "learning_rate": 4.781604419826845e-05, + "loss": 0.2698, + "step": 991, + "task_loss": 0.2971741855144501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7313030708561112, + "compression_loss": 0.0, + "distillation_loss": 0.10828962922096252, + "epoch": 0.94, + "learning_rate": 4.781168668436532e-05, + "loss": 0.1087, + "step": 992, + "task_loss": 0.11210381239652634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7313307935874732, + "compression_loss": 0.0, + "distillation_loss": 0.3468828499317169, + "epoch": 0.94, + "learning_rate": 4.780732502660943e-05, + "loss": 0.3356, + "step": 993, + "task_loss": 0.2339598685503006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7313585088594698, + "compression_loss": 0.0, + "distillation_loss": 0.14131498336791992, + "epoch": 0.94, + "learning_rate": 4.780295922579312e-05, + "loss": 0.1407, + "step": 994, + "task_loss": 0.1348223239183426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7313862166731049, + "compression_loss": 0.0, + "distillation_loss": 0.2651304304599762, + "epoch": 0.94, + "learning_rate": 4.779858928270944e-05, + "loss": 0.253, + "step": 995, + "task_loss": 0.1437424123287201 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.731413917029382, + "compression_loss": 0.0, + "distillation_loss": 0.11093267053365707, + "epoch": 0.95, + "learning_rate": 4.7794215198152216e-05, + "loss": 0.1031, + "step": 996, + "task_loss": 0.03270239382982254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7314416099293048, + "compression_loss": 0.0, + "distillation_loss": 0.2656594514846802, + "epoch": 0.95, + "learning_rate": 4.778983697291603e-05, + "loss": 0.2697, + "step": 997, + "task_loss": 0.3064509630203247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7314692953738771, + "compression_loss": 0.0, + "distillation_loss": 0.29231274127960205, + "epoch": 0.95, + "learning_rate": 4.7785454607796195e-05, + "loss": 0.2753, + "step": 998, + "task_loss": 0.12249677628278732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7314969733641024, + "compression_loss": 0.0, + "distillation_loss": 0.15542489290237427, + "epoch": 0.95, + "learning_rate": 4.77810681035888e-05, + "loss": 0.154, + "step": 999, + "task_loss": 0.14133189618587494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7315246439009845, + "compression_loss": 0.0, + "distillation_loss": 0.27466249465942383, + "epoch": 0.95, + "learning_rate": 4.777667746109067e-05, + "loss": 0.2681, + "step": 1000, + "task_loss": 0.20898157358169556 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.9094036697247706, + "eval_loss": 0.3636666536331177, + "eval_runtime": 17.8028, + "eval_samples_per_second": 48.981, + "eval_steps_per_second": 6.123, + "step": 1000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7315523069855271, + "compression_loss": 0.0, + "distillation_loss": 0.10684286057949066, + "epoch": 0.95, + "learning_rate": 4.7772282681099377e-05, + "loss": 0.1, + "step": 1001, + "task_loss": 0.038139708340168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7315799626187338, + "compression_loss": 0.0, + "distillation_loss": 0.33934301137924194, + "epoch": 0.95, + "learning_rate": 4.7767883764413266e-05, + "loss": 0.3332, + "step": 1002, + "task_loss": 0.27778148651123047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7316076108016083, + "compression_loss": 0.0, + "distillation_loss": 0.2035553753376007, + "epoch": 0.95, + "learning_rate": 4.776348071183142e-05, + "loss": 0.1945, + "step": 1003, + "task_loss": 0.11305206269025803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7316352515351543, + "compression_loss": 0.0, + "distillation_loss": 0.1286626160144806, + "epoch": 0.95, + "learning_rate": 4.775907352415367e-05, + "loss": 0.1426, + "step": 1004, + "task_loss": 0.26838886737823486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7316628848203756, + "compression_loss": 0.0, + "distillation_loss": 0.4252236485481262, + "epoch": 0.95, + "learning_rate": 4.7754662202180606e-05, + "loss": 0.4083, + "step": 1005, + "task_loss": 0.2558348774909973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7316905106582756, + "compression_loss": 0.0, + "distillation_loss": 0.24486252665519714, + "epoch": 0.96, + "learning_rate": 4.7750246746713565e-05, + "loss": 0.2329, + "step": 1006, + "task_loss": 0.12567219138145447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7317181290498582, + "compression_loss": 0.0, + "distillation_loss": 0.30694228410720825, + "epoch": 0.96, + "learning_rate": 4.7745827158554634e-05, + "loss": 0.2926, + "step": 1007, + "task_loss": 0.1638035923242569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7317457399961271, + "compression_loss": 0.0, + "distillation_loss": 0.22996553778648376, + "epoch": 0.96, + "learning_rate": 4.774140343850666e-05, + "loss": 0.227, + "step": 1008, + "task_loss": 0.19992922246456146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.731773343498086, + "compression_loss": 0.0, + "distillation_loss": 0.17360806465148926, + "epoch": 0.96, + "learning_rate": 4.773697558737322e-05, + "loss": 0.1746, + "step": 1009, + "task_loss": 0.1831490695476532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7318009395567384, + "compression_loss": 0.0, + "distillation_loss": 0.08699658513069153, + "epoch": 0.96, + "learning_rate": 4.773254360595867e-05, + "loss": 0.082, + "step": 1010, + "task_loss": 0.03714621439576149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7318285281730881, + "compression_loss": 0.0, + "distillation_loss": 0.12660613656044006, + "epoch": 0.96, + "learning_rate": 4.77281074950681e-05, + "loss": 0.1187, + "step": 1011, + "task_loss": 0.04724828153848648 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7318561093481386, + "compression_loss": 0.0, + "distillation_loss": 0.1175093725323677, + "epoch": 0.96, + "learning_rate": 4.7723667255507334e-05, + "loss": 0.1299, + "step": 1012, + "task_loss": 0.24159134924411774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.731883683082894, + "compression_loss": 0.0, + "distillation_loss": 0.2919718027114868, + "epoch": 0.96, + "learning_rate": 4.771922288808297e-05, + "loss": 0.2807, + "step": 1013, + "task_loss": 0.17899443209171295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7319112493783576, + "compression_loss": 0.0, + "distillation_loss": 0.13135388493537903, + "epoch": 0.96, + "learning_rate": 4.771477439360235e-05, + "loss": 0.1246, + "step": 1014, + "task_loss": 0.06415353715419769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7319388082355331, + "compression_loss": 0.0, + "distillation_loss": 0.35100582242012024, + "epoch": 0.96, + "learning_rate": 4.7710321772873566e-05, + "loss": 0.348, + "step": 1015, + "task_loss": 0.3205385208129883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7319663596554244, + "compression_loss": 0.0, + "distillation_loss": 0.2460920214653015, + "epoch": 0.96, + "learning_rate": 4.770586502670546e-05, + "loss": 0.2355, + "step": 1016, + "task_loss": 0.13989022374153137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7319939036390352, + "compression_loss": 0.0, + "distillation_loss": 0.23754239082336426, + "epoch": 0.97, + "learning_rate": 4.770140415590762e-05, + "loss": 0.2337, + "step": 1017, + "task_loss": 0.19897620379924774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7320214401873689, + "compression_loss": 0.0, + "distillation_loss": 0.46301230788230896, + "epoch": 0.97, + "learning_rate": 4.769693916129039e-05, + "loss": 0.4469, + "step": 1018, + "task_loss": 0.30206912755966187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7320489693014294, + "compression_loss": 0.0, + "distillation_loss": 0.41447192430496216, + "epoch": 0.97, + "learning_rate": 4.769247004366485e-05, + "loss": 0.3952, + "step": 1019, + "task_loss": 0.22126147150993347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7320764909822203, + "compression_loss": 0.0, + "distillation_loss": 0.19669479131698608, + "epoch": 0.97, + "learning_rate": 4.768799680384283e-05, + "loss": 0.1961, + "step": 1020, + "task_loss": 0.19112606346607208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7321040052307453, + "compression_loss": 0.0, + "distillation_loss": 0.1391059011220932, + "epoch": 0.97, + "learning_rate": 4.768351944263693e-05, + "loss": 0.1306, + "step": 1021, + "task_loss": 0.05390199273824692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7321315120480081, + "compression_loss": 0.0, + "distillation_loss": 0.1612778604030609, + "epoch": 0.97, + "learning_rate": 4.767903796086048e-05, + "loss": 0.1569, + "step": 1022, + "task_loss": 0.11706437915563583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7321590114350123, + "compression_loss": 0.0, + "distillation_loss": 0.13082058727741241, + "epoch": 0.97, + "learning_rate": 4.767455235932756e-05, + "loss": 0.1235, + "step": 1023, + "task_loss": 0.05800522491335869 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7321865033927617, + "compression_loss": 0.0, + "distillation_loss": 0.07101086527109146, + "epoch": 0.97, + "learning_rate": 4.7670062638853e-05, + "loss": 0.0663, + "step": 1024, + "task_loss": 0.023550687357783318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7322139879222599, + "compression_loss": 0.0, + "distillation_loss": 0.7248010635375977, + "epoch": 0.97, + "learning_rate": 4.766556880025238e-05, + "loss": 0.6946, + "step": 1025, + "task_loss": 0.4223037362098694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7322414650245106, + "compression_loss": 0.0, + "distillation_loss": 0.18530097603797913, + "epoch": 0.97, + "learning_rate": 4.7661070844342033e-05, + "loss": 0.1791, + "step": 1026, + "task_loss": 0.12346015125513077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7322689347005176, + "compression_loss": 0.0, + "distillation_loss": 0.436127245426178, + "epoch": 0.98, + "learning_rate": 4.7656568771939024e-05, + "loss": 0.4136, + "step": 1027, + "task_loss": 0.21047565340995789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7322963969512843, + "compression_loss": 0.0, + "distillation_loss": 0.33645620942115784, + "epoch": 0.98, + "learning_rate": 4.765206258386119e-05, + "loss": 0.3199, + "step": 1028, + "task_loss": 0.17079196870326996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7323238517778147, + "compression_loss": 0.0, + "distillation_loss": 0.2837325930595398, + "epoch": 0.98, + "learning_rate": 4.7647552280927086e-05, + "loss": 0.2713, + "step": 1029, + "task_loss": 0.15971429646015167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7323512991811123, + "compression_loss": 0.0, + "distillation_loss": 0.2346368283033371, + "epoch": 0.98, + "learning_rate": 4.764303786395604e-05, + "loss": 0.2218, + "step": 1030, + "task_loss": 0.10661163181066513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7323787391621808, + "compression_loss": 0.0, + "distillation_loss": 0.26597869396209717, + "epoch": 0.98, + "learning_rate": 4.763851933376812e-05, + "loss": 0.2678, + "step": 1031, + "task_loss": 0.284251868724823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7324061717220239, + "compression_loss": 0.0, + "distillation_loss": 0.21545451879501343, + "epoch": 0.98, + "learning_rate": 4.763399669118414e-05, + "loss": 0.2012, + "step": 1032, + "task_loss": 0.07268868386745453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7324335968616453, + "compression_loss": 0.0, + "distillation_loss": 0.3080711364746094, + "epoch": 0.98, + "learning_rate": 4.762946993702565e-05, + "loss": 0.2918, + "step": 1033, + "task_loss": 0.14552035927772522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7324610145820487, + "compression_loss": 0.0, + "distillation_loss": 0.2603287100791931, + "epoch": 0.98, + "learning_rate": 4.7624939072114954e-05, + "loss": 0.2473, + "step": 1034, + "task_loss": 0.12976713478565216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7324884248842377, + "compression_loss": 0.0, + "distillation_loss": 0.28742021322250366, + "epoch": 0.98, + "learning_rate": 4.762040409727512e-05, + "loss": 0.2733, + "step": 1035, + "task_loss": 0.14594219624996185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7325158277692161, + "compression_loss": 0.0, + "distillation_loss": 0.17579105496406555, + "epoch": 0.98, + "learning_rate": 4.761586501332994e-05, + "loss": 0.1813, + "step": 1036, + "task_loss": 0.23123717308044434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7325432232379875, + "compression_loss": 0.0, + "distillation_loss": 0.1056813895702362, + "epoch": 0.98, + "learning_rate": 4.7611321821103954e-05, + "loss": 0.1001, + "step": 1037, + "task_loss": 0.04944942146539688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7325706112915555, + "compression_loss": 0.0, + "distillation_loss": 0.10237106680870056, + "epoch": 0.99, + "learning_rate": 4.760677452142247e-05, + "loss": 0.098, + "step": 1038, + "task_loss": 0.05824420601129532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.732597991930924, + "compression_loss": 0.0, + "distillation_loss": 0.28087666630744934, + "epoch": 0.99, + "learning_rate": 4.760222311511152e-05, + "loss": 0.273, + "step": 1039, + "task_loss": 0.20193740725517273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7326253651570965, + "compression_loss": 0.0, + "distillation_loss": 0.31947529315948486, + "epoch": 0.99, + "learning_rate": 4.759766760299788e-05, + "loss": 0.316, + "step": 1040, + "task_loss": 0.2846387028694153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7326527309710767, + "compression_loss": 0.0, + "distillation_loss": 0.08732345700263977, + "epoch": 0.99, + "learning_rate": 4.759310798590909e-05, + "loss": 0.0912, + "step": 1041, + "task_loss": 0.12588536739349365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7326800893738684, + "compression_loss": 0.0, + "distillation_loss": 0.31434834003448486, + "epoch": 0.99, + "learning_rate": 4.758854426467343e-05, + "loss": 0.3049, + "step": 1042, + "task_loss": 0.22009915113449097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7327074403664752, + "compression_loss": 0.0, + "distillation_loss": 0.3221967816352844, + "epoch": 0.99, + "learning_rate": 4.758397644011992e-05, + "loss": 0.3095, + "step": 1043, + "task_loss": 0.19476738572120667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7327347839499008, + "compression_loss": 0.0, + "distillation_loss": 0.2775757908821106, + "epoch": 0.99, + "learning_rate": 4.757940451307831e-05, + "loss": 0.2682, + "step": 1044, + "task_loss": 0.18372797966003418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7327621201251489, + "compression_loss": 0.0, + "distillation_loss": 0.25040361285209656, + "epoch": 0.99, + "learning_rate": 4.757482848437914e-05, + "loss": 0.2413, + "step": 1045, + "task_loss": 0.15979930758476257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.732789448893223, + "compression_loss": 0.0, + "distillation_loss": 0.16249604523181915, + "epoch": 0.99, + "learning_rate": 4.7570248354853644e-05, + "loss": 0.1538, + "step": 1046, + "task_loss": 0.07583259046077728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.732816770255127, + "compression_loss": 0.0, + "distillation_loss": 0.09381724894046783, + "epoch": 0.99, + "learning_rate": 4.7565664125333845e-05, + "loss": 0.0878, + "step": 1047, + "task_loss": 0.033302441239356995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7328440842118646, + "compression_loss": 0.0, + "distillation_loss": 0.2704293131828308, + "epoch": 1.0, + "learning_rate": 4.7561075796652464e-05, + "loss": 0.2806, + "step": 1048, + "task_loss": 0.3726223111152649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7328713907644394, + "compression_loss": 0.0, + "distillation_loss": 0.2771369516849518, + "epoch": 1.0, + "learning_rate": 4.755648336964302e-05, + "loss": 0.2641, + "step": 1049, + "task_loss": 0.14655812084674835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.732898689913855, + "compression_loss": 0.0, + "distillation_loss": 0.11366402357816696, + "epoch": 1.0, + "learning_rate": 4.7551886845139743e-05, + "loss": 0.1051, + "step": 1050, + "task_loss": 0.028415286913514137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7329259816611152, + "compression_loss": 0.0, + "distillation_loss": 0.152854323387146, + "epoch": 1.0, + "learning_rate": 4.754728622397761e-05, + "loss": 0.1433, + "step": 1051, + "task_loss": 0.05741133540868759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.7329532660072237, + "compression_loss": 0.0, + "distillation_loss": 0.18268108367919922, + "epoch": 1.0, + "learning_rate": 4.754268150699234e-05, + "loss": 0.1803, + "step": 1052, + "task_loss": 0.15914951264858246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, + "compression/magnitude_sparsity/target_sparsity_level": 0.732980542953184, + "compression_loss": 0.0, + "distillation_loss": 0.04385395348072052, + "epoch": 1.0, + "learning_rate": 4.753807269502041e-05, + "loss": 0.0402, + "step": 1053, + "task_loss": 0.007449280470609665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7330078125, + "compression_loss": 0.0, + "distillation_loss": 0.29212307929992676, + "epoch": 1.0, + "learning_rate": 4.7533459788899026e-05, + "loss": 0.277, + "step": 1054, + "task_loss": 0.1408318132162094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7330350746486752, + "compression_loss": 0.0, + "distillation_loss": 0.34965944290161133, + "epoch": 1.0, + "learning_rate": 4.752884278946614e-05, + "loss": 0.3271, + "step": 1055, + "task_loss": 0.12405388802289963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7330623294002135, + "compression_loss": 0.0, + "distillation_loss": 0.46271616220474243, + "epoch": 1.0, + "learning_rate": 4.752422169756048e-05, + "loss": 0.4489, + "step": 1056, + "task_loss": 0.3246670067310333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7330895767556185, + "compression_loss": 0.0, + "distillation_loss": 0.26765865087509155, + "epoch": 1.0, + "learning_rate": 4.7519596514021464e-05, + "loss": 0.2574, + "step": 1057, + "task_loss": 0.16510917246341705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7331168167158937, + "compression_loss": 0.0, + "distillation_loss": 0.7084404230117798, + "epoch": 1.0, + "learning_rate": 4.751496723968929e-05, + "loss": 0.6848, + "step": 1058, + "task_loss": 0.4719713628292084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7331440492820429, + "compression_loss": 0.0, + "distillation_loss": 0.19782431423664093, + "epoch": 1.01, + "learning_rate": 4.751033387540488e-05, + "loss": 0.1849, + "step": 1059, + "task_loss": 0.06854899227619171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7331712744550699, + "compression_loss": 0.0, + "distillation_loss": 0.44147199392318726, + "epoch": 1.01, + "learning_rate": 4.7505696422009904e-05, + "loss": 0.4319, + "step": 1060, + "task_loss": 0.34560465812683105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7331984922359782, + "compression_loss": 0.0, + "distillation_loss": 0.5233361124992371, + "epoch": 1.01, + "learning_rate": 4.750105488034679e-05, + "loss": 0.4983, + "step": 1061, + "task_loss": 0.27257034182548523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7332257026257717, + "compression_loss": 0.0, + "distillation_loss": 0.5197453498840332, + "epoch": 1.01, + "learning_rate": 4.749640925125869e-05, + "loss": 0.5011, + "step": 1062, + "task_loss": 0.3330010771751404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7332529056254539, + "compression_loss": 0.0, + "distillation_loss": 0.17582668364048004, + "epoch": 1.01, + "learning_rate": 4.749175953558951e-05, + "loss": 0.1842, + "step": 1063, + "task_loss": 0.2600041329860687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7332801012360284, + "compression_loss": 0.0, + "distillation_loss": 0.4421820640563965, + "epoch": 1.01, + "learning_rate": 4.748710573418388e-05, + "loss": 0.4457, + "step": 1064, + "task_loss": 0.47691667079925537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7333072894584991, + "compression_loss": 0.0, + "distillation_loss": 0.18625092506408691, + "epoch": 1.01, + "learning_rate": 4.7482447847887204e-05, + "loss": 0.1779, + "step": 1065, + "task_loss": 0.1030142530798912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7333344702938697, + "compression_loss": 0.0, + "distillation_loss": 0.19206887483596802, + "epoch": 1.01, + "learning_rate": 4.747778587754559e-05, + "loss": 0.1904, + "step": 1066, + "task_loss": 0.1756243109703064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7333616437431436, + "compression_loss": 0.0, + "distillation_loss": 0.28138798475265503, + "epoch": 1.01, + "learning_rate": 4.7473119824005926e-05, + "loss": 0.2826, + "step": 1067, + "task_loss": 0.29329320788383484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7333888098073248, + "compression_loss": 0.0, + "distillation_loss": 0.14245599508285522, + "epoch": 1.01, + "learning_rate": 4.7468449688115806e-05, + "loss": 0.1334, + "step": 1068, + "task_loss": 0.05212767794728279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7334159684874167, + "compression_loss": 0.0, + "distillation_loss": 0.517392635345459, + "epoch": 1.02, + "learning_rate": 4.74637754707236e-05, + "loss": 0.5164, + "step": 1069, + "task_loss": 0.5071411728858948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7334431197844233, + "compression_loss": 0.0, + "distillation_loss": 0.3079312741756439, + "epoch": 1.02, + "learning_rate": 4.7459097172678386e-05, + "loss": 0.3028, + "step": 1070, + "task_loss": 0.2564443051815033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.733470263699348, + "compression_loss": 0.0, + "distillation_loss": 0.07088293135166168, + "epoch": 1.02, + "learning_rate": 4.745441479483001e-05, + "loss": 0.0657, + "step": 1071, + "task_loss": 0.019033435732126236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7334974002331945, + "compression_loss": 0.0, + "distillation_loss": 0.1880199909210205, + "epoch": 1.02, + "learning_rate": 4.744972833802904e-05, + "loss": 0.1756, + "step": 1072, + "task_loss": 0.06384404003620148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7335245293869668, + "compression_loss": 0.0, + "distillation_loss": 0.36879584193229675, + "epoch": 1.02, + "learning_rate": 4.74450378031268e-05, + "loss": 0.3632, + "step": 1073, + "task_loss": 0.3126252293586731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7335516511616682, + "compression_loss": 0.0, + "distillation_loss": 0.33385515213012695, + "epoch": 1.02, + "learning_rate": 4.744034319097535e-05, + "loss": 0.3191, + "step": 1074, + "task_loss": 0.18633373081684113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7335787655583026, + "compression_loss": 0.0, + "distillation_loss": 0.3007218539714813, + "epoch": 1.02, + "learning_rate": 4.743564450242749e-05, + "loss": 0.2902, + "step": 1075, + "task_loss": 0.19521453976631165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7336058725778736, + "compression_loss": 0.0, + "distillation_loss": 0.2639384865760803, + "epoch": 1.02, + "learning_rate": 4.7430941738336745e-05, + "loss": 0.2496, + "step": 1076, + "task_loss": 0.120790995657444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7336329722213848, + "compression_loss": 0.0, + "distillation_loss": 0.3540310859680176, + "epoch": 1.02, + "learning_rate": 4.742623489955741e-05, + "loss": 0.3531, + "step": 1077, + "task_loss": 0.34429335594177246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7336600644898401, + "compression_loss": 0.0, + "distillation_loss": 0.3709731698036194, + "epoch": 1.02, + "learning_rate": 4.74215239869445e-05, + "loss": 0.3647, + "step": 1078, + "task_loss": 0.30800601840019226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.733687149384243, + "compression_loss": 0.0, + "distillation_loss": 0.22464478015899658, + "epoch": 1.02, + "learning_rate": 4.741680900135377e-05, + "loss": 0.2118, + "step": 1079, + "task_loss": 0.09570850431919098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7337142269055972, + "compression_loss": 0.0, + "distillation_loss": 0.4049076437950134, + "epoch": 1.03, + "learning_rate": 4.741208994364173e-05, + "loss": 0.3865, + "step": 1080, + "task_loss": 0.2211461365222931 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7337412970549065, + "compression_loss": 0.0, + "distillation_loss": 0.1285635232925415, + "epoch": 1.03, + "learning_rate": 4.740736681466561e-05, + "loss": 0.1211, + "step": 1081, + "task_loss": 0.0543033629655838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7337683598331745, + "compression_loss": 0.0, + "distillation_loss": 0.15440712869167328, + "epoch": 1.03, + "learning_rate": 4.74026396152834e-05, + "loss": 0.1622, + "step": 1082, + "task_loss": 0.2323606014251709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7337954152414049, + "compression_loss": 0.0, + "distillation_loss": 0.06974273920059204, + "epoch": 1.03, + "learning_rate": 4.7397908346353796e-05, + "loss": 0.0666, + "step": 1083, + "task_loss": 0.038624271750450134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7338224632806014, + "compression_loss": 0.0, + "distillation_loss": 0.41569557785987854, + "epoch": 1.03, + "learning_rate": 4.739317300873628e-05, + "loss": 0.3924, + "step": 1084, + "task_loss": 0.1831234097480774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7338495039517676, + "compression_loss": 0.0, + "distillation_loss": 0.3054138720035553, + "epoch": 1.03, + "learning_rate": 4.738843360329104e-05, + "loss": 0.2951, + "step": 1085, + "task_loss": 0.20198288559913635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7338765372559072, + "compression_loss": 0.0, + "distillation_loss": 0.26133251190185547, + "epoch": 1.03, + "learning_rate": 4.738369013087902e-05, + "loss": 0.2525, + "step": 1086, + "task_loss": 0.17272129654884338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7339035631940241, + "compression_loss": 0.0, + "distillation_loss": 0.20728163421154022, + "epoch": 1.03, + "learning_rate": 4.7378942592361876e-05, + "loss": 0.1958, + "step": 1087, + "task_loss": 0.09262159466743469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7339305817671216, + "compression_loss": 0.0, + "distillation_loss": 0.11674150079488754, + "epoch": 1.03, + "learning_rate": 4.737419098860204e-05, + "loss": 0.108, + "step": 1088, + "task_loss": 0.029347870498895645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7339575929762037, + "compression_loss": 0.0, + "distillation_loss": 0.3030095100402832, + "epoch": 1.03, + "learning_rate": 4.7369435320462654e-05, + "loss": 0.2916, + "step": 1089, + "task_loss": 0.1885221302509308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7339845968222738, + "compression_loss": 0.0, + "distillation_loss": 0.05839750915765762, + "epoch": 1.04, + "learning_rate": 4.73646755888076e-05, + "loss": 0.054, + "step": 1090, + "task_loss": 0.014137573540210724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7340115933063359, + "compression_loss": 0.0, + "distillation_loss": 0.1552124172449112, + "epoch": 1.04, + "learning_rate": 4.7359911794501526e-05, + "loss": 0.1474, + "step": 1091, + "task_loss": 0.07671723514795303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7340385824293935, + "compression_loss": 0.0, + "distillation_loss": 0.273028165102005, + "epoch": 1.04, + "learning_rate": 4.7355143938409785e-05, + "loss": 0.2641, + "step": 1092, + "task_loss": 0.18373528122901917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7340655641924503, + "compression_loss": 0.0, + "distillation_loss": 0.2551477551460266, + "epoch": 1.04, + "learning_rate": 4.735037202139849e-05, + "loss": 0.2586, + "step": 1093, + "task_loss": 0.28983938694000244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.73409253859651, + "compression_loss": 0.0, + "distillation_loss": 0.15983590483665466, + "epoch": 1.04, + "learning_rate": 4.734559604433447e-05, + "loss": 0.1529, + "step": 1094, + "task_loss": 0.09036150574684143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7341195056425763, + "compression_loss": 0.0, + "distillation_loss": 0.13693635165691376, + "epoch": 1.04, + "learning_rate": 4.734081600808531e-05, + "loss": 0.1312, + "step": 1095, + "task_loss": 0.07925444841384888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7341464653316528, + "compression_loss": 0.0, + "distillation_loss": 0.17353419959545135, + "epoch": 1.04, + "learning_rate": 4.733603191351933e-05, + "loss": 0.1626, + "step": 1096, + "task_loss": 0.06457867473363876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7341734176647433, + "compression_loss": 0.0, + "distillation_loss": 0.15793973207473755, + "epoch": 1.04, + "learning_rate": 4.733124376150558e-05, + "loss": 0.1539, + "step": 1097, + "task_loss": 0.11756959557533264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7342003626428515, + "compression_loss": 0.0, + "distillation_loss": 0.08797385543584824, + "epoch": 1.04, + "learning_rate": 4.7326451552913856e-05, + "loss": 0.0843, + "step": 1098, + "task_loss": 0.05155399069190025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7342273002669808, + "compression_loss": 0.0, + "distillation_loss": 0.1670214980840683, + "epoch": 1.04, + "learning_rate": 4.7321655288614674e-05, + "loss": 0.1685, + "step": 1099, + "task_loss": 0.18175487220287323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7342542305381352, + "compression_loss": 0.0, + "distillation_loss": 0.27932503819465637, + "epoch": 1.04, + "learning_rate": 4.7316854969479314e-05, + "loss": 0.271, + "step": 1100, + "task_loss": 0.1957254558801651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7342811534573183, + "compression_loss": 0.0, + "distillation_loss": 0.3590747117996216, + "epoch": 1.05, + "learning_rate": 4.7312050596379764e-05, + "loss": 0.3598, + "step": 1101, + "task_loss": 0.36656391620635986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7343080690255337, + "compression_loss": 0.0, + "distillation_loss": 0.17354151606559753, + "epoch": 1.05, + "learning_rate": 4.730724217018877e-05, + "loss": 0.1851, + "step": 1102, + "task_loss": 0.2888186275959015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7343349772437852, + "compression_loss": 0.0, + "distillation_loss": 0.17269417643547058, + "epoch": 1.05, + "learning_rate": 4.7302429691779806e-05, + "loss": 0.1714, + "step": 1103, + "task_loss": 0.1596398800611496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7343618781130763, + "compression_loss": 0.0, + "distillation_loss": 0.23135778307914734, + "epoch": 1.05, + "learning_rate": 4.729761316202708e-05, + "loss": 0.2202, + "step": 1104, + "task_loss": 0.11965839564800262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7343887716344109, + "compression_loss": 0.0, + "distillation_loss": 0.24613159894943237, + "epoch": 1.05, + "learning_rate": 4.729279258180553e-05, + "loss": 0.2429, + "step": 1105, + "task_loss": 0.21396657824516296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7344156578087925, + "compression_loss": 0.0, + "distillation_loss": 0.34451723098754883, + "epoch": 1.05, + "learning_rate": 4.7287967951990855e-05, + "loss": 0.3342, + "step": 1106, + "task_loss": 0.2409912496805191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7344425366372249, + "compression_loss": 0.0, + "distillation_loss": 0.15786179900169373, + "epoch": 1.05, + "learning_rate": 4.7283139273459445e-05, + "loss": 0.1565, + "step": 1107, + "task_loss": 0.14464277029037476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7344694081207117, + "compression_loss": 0.0, + "distillation_loss": 0.18467701971530914, + "epoch": 1.05, + "learning_rate": 4.727830654708848e-05, + "loss": 0.1777, + "step": 1108, + "task_loss": 0.11510906368494034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7344962722602567, + "compression_loss": 0.0, + "distillation_loss": 0.22931762039661407, + "epoch": 1.05, + "learning_rate": 4.727346977375584e-05, + "loss": 0.22, + "step": 1109, + "task_loss": 0.13630938529968262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7345231290568635, + "compression_loss": 0.0, + "distillation_loss": 0.35741716623306274, + "epoch": 1.05, + "learning_rate": 4.7268628954340136e-05, + "loss": 0.3397, + "step": 1110, + "task_loss": 0.18016816675662994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7345499785115357, + "compression_loss": 0.0, + "distillation_loss": 0.24529217183589935, + "epoch": 1.06, + "learning_rate": 4.726378408972074e-05, + "loss": 0.238, + "step": 1111, + "task_loss": 0.1728375256061554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7345768206252772, + "compression_loss": 0.0, + "distillation_loss": 0.29263418912887573, + "epoch": 1.06, + "learning_rate": 4.725893518077774e-05, + "loss": 0.2744, + "step": 1112, + "task_loss": 0.11022615432739258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7346036553990914, + "compression_loss": 0.0, + "distillation_loss": 0.12653577327728271, + "epoch": 1.06, + "learning_rate": 4.725408222839197e-05, + "loss": 0.1174, + "step": 1113, + "task_loss": 0.034942492842674255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7346304828339822, + "compression_loss": 0.0, + "distillation_loss": 0.37742096185684204, + "epoch": 1.06, + "learning_rate": 4.724922523344498e-05, + "loss": 0.3659, + "step": 1114, + "task_loss": 0.26208043098449707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7346573029309533, + "compression_loss": 0.0, + "distillation_loss": 0.40198618173599243, + "epoch": 1.06, + "learning_rate": 4.724436419681907e-05, + "loss": 0.3978, + "step": 1115, + "task_loss": 0.3604857623577118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7346841156910082, + "compression_loss": 0.0, + "distillation_loss": 0.24844889342784882, + "epoch": 1.06, + "learning_rate": 4.723949911939728e-05, + "loss": 0.2425, + "step": 1116, + "task_loss": 0.1891540288925171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7347109211151508, + "compression_loss": 0.0, + "distillation_loss": 0.5047147274017334, + "epoch": 1.06, + "learning_rate": 4.723463000206337e-05, + "loss": 0.4934, + "step": 1117, + "task_loss": 0.39201080799102783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7347377192043846, + "compression_loss": 0.0, + "distillation_loss": 0.1238306388258934, + "epoch": 1.06, + "learning_rate": 4.722975684570183e-05, + "loss": 0.1146, + "step": 1118, + "task_loss": 0.03139756619930267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7347645099597133, + "compression_loss": 0.0, + "distillation_loss": 0.23237791657447815, + "epoch": 1.06, + "learning_rate": 4.7224879651197905e-05, + "loss": 0.2398, + "step": 1119, + "task_loss": 0.30690667033195496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7347912933821407, + "compression_loss": 0.0, + "distillation_loss": 0.20782580971717834, + "epoch": 1.06, + "learning_rate": 4.721999841943755e-05, + "loss": 0.2096, + "step": 1120, + "task_loss": 0.22543630003929138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7348180694726704, + "compression_loss": 0.0, + "distillation_loss": 0.18192127346992493, + "epoch": 1.06, + "learning_rate": 4.721511315130747e-05, + "loss": 0.1793, + "step": 1121, + "task_loss": 0.15596555173397064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7348448382323061, + "compression_loss": 0.0, + "distillation_loss": 0.14186906814575195, + "epoch": 1.07, + "learning_rate": 4.7210223847695104e-05, + "loss": 0.1417, + "step": 1122, + "task_loss": 0.14012937247753143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7348715996620514, + "compression_loss": 0.0, + "distillation_loss": 0.33551573753356934, + "epoch": 1.07, + "learning_rate": 4.72053305094886e-05, + "loss": 0.3328, + "step": 1123, + "task_loss": 0.3087965250015259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.73489835376291, + "compression_loss": 0.0, + "distillation_loss": 0.16018086671829224, + "epoch": 1.07, + "learning_rate": 4.720043313757687e-05, + "loss": 0.1579, + "step": 1124, + "task_loss": 0.13755735754966736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7349251005358858, + "compression_loss": 0.0, + "distillation_loss": 0.0789952278137207, + "epoch": 1.07, + "learning_rate": 4.719553173284955e-05, + "loss": 0.0733, + "step": 1125, + "task_loss": 0.021938461810350418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7349518399819822, + "compression_loss": 0.0, + "distillation_loss": 0.1255275011062622, + "epoch": 1.07, + "learning_rate": 4.719062629619699e-05, + "loss": 0.1193, + "step": 1126, + "task_loss": 0.06374667584896088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7349785721022031, + "compression_loss": 0.0, + "distillation_loss": 0.14572109282016754, + "epoch": 1.07, + "learning_rate": 4.71857168285103e-05, + "loss": 0.1458, + "step": 1127, + "task_loss": 0.14671580493450165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.735005296897552, + "compression_loss": 0.0, + "distillation_loss": 0.18587951362133026, + "epoch": 1.07, + "learning_rate": 4.718080333068129e-05, + "loss": 0.186, + "step": 1128, + "task_loss": 0.18705090880393982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7350320143690328, + "compression_loss": 0.0, + "distillation_loss": 0.31489160656929016, + "epoch": 1.07, + "learning_rate": 4.717588580360253e-05, + "loss": 0.2983, + "step": 1129, + "task_loss": 0.14935556054115295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7350587245176489, + "compression_loss": 0.0, + "distillation_loss": 0.2369147539138794, + "epoch": 1.07, + "learning_rate": 4.717096424816731e-05, + "loss": 0.2477, + "step": 1130, + "task_loss": 0.3448143005371094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7350854273444042, + "compression_loss": 0.0, + "distillation_loss": 0.24669213593006134, + "epoch": 1.07, + "learning_rate": 4.716603866526967e-05, + "loss": 0.2424, + "step": 1131, + "task_loss": 0.20330274105072021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7351121228503023, + "compression_loss": 0.0, + "distillation_loss": 0.29487940669059753, + "epoch": 1.08, + "learning_rate": 4.7161109055804356e-05, + "loss": 0.286, + "step": 1132, + "task_loss": 0.20643703639507294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7351388110363469, + "compression_loss": 0.0, + "distillation_loss": 0.35977235436439514, + "epoch": 1.08, + "learning_rate": 4.7156175420666844e-05, + "loss": 0.3461, + "step": 1133, + "task_loss": 0.22308805584907532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7351654919035416, + "compression_loss": 0.0, + "distillation_loss": 0.21201397478580475, + "epoch": 1.08, + "learning_rate": 4.715123776075336e-05, + "loss": 0.2005, + "step": 1134, + "task_loss": 0.09711904078722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7351921654528903, + "compression_loss": 0.0, + "distillation_loss": 0.16644607484340668, + "epoch": 1.08, + "learning_rate": 4.714629607696086e-05, + "loss": 0.1694, + "step": 1135, + "task_loss": 0.19630329310894012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7352188316853965, + "compression_loss": 0.0, + "distillation_loss": 0.06290404498577118, + "epoch": 1.08, + "learning_rate": 4.714135037018702e-05, + "loss": 0.0586, + "step": 1136, + "task_loss": 0.01941380277276039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7352454906020639, + "compression_loss": 0.0, + "distillation_loss": 0.2712949216365814, + "epoch": 1.08, + "learning_rate": 4.713640064133025e-05, + "loss": 0.2624, + "step": 1137, + "task_loss": 0.18186871707439423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7352721422038961, + "compression_loss": 0.0, + "distillation_loss": 0.11187370866537094, + "epoch": 1.08, + "learning_rate": 4.7131446891289694e-05, + "loss": 0.1134, + "step": 1138, + "task_loss": 0.12678144872188568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7352987864918971, + "compression_loss": 0.0, + "distillation_loss": 0.1483362764120102, + "epoch": 1.08, + "learning_rate": 4.712648912096522e-05, + "loss": 0.1437, + "step": 1139, + "task_loss": 0.10216772556304932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7353254234670703, + "compression_loss": 0.0, + "distillation_loss": 0.204280823469162, + "epoch": 1.08, + "learning_rate": 4.712152733125744e-05, + "loss": 0.2035, + "step": 1140, + "task_loss": 0.196111261844635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7353520531304194, + "compression_loss": 0.0, + "distillation_loss": 0.18241867423057556, + "epoch": 1.08, + "learning_rate": 4.711656152306768e-05, + "loss": 0.1709, + "step": 1141, + "task_loss": 0.06716363877058029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7353786754829481, + "compression_loss": 0.0, + "distillation_loss": 0.18135663866996765, + "epoch": 1.08, + "learning_rate": 4.711159169729801e-05, + "loss": 0.1737, + "step": 1142, + "task_loss": 0.10434912145137787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7354052905256602, + "compression_loss": 0.0, + "distillation_loss": 0.16455084085464478, + "epoch": 1.09, + "learning_rate": 4.710661785485121e-05, + "loss": 0.1577, + "step": 1143, + "task_loss": 0.09606322646141052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7354318982595592, + "compression_loss": 0.0, + "distillation_loss": 0.18896543979644775, + "epoch": 1.09, + "learning_rate": 4.710163999663081e-05, + "loss": 0.1757, + "step": 1144, + "task_loss": 0.05640107020735741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7354584986856489, + "compression_loss": 0.0, + "distillation_loss": 0.21004149317741394, + "epoch": 1.09, + "learning_rate": 4.709665812354107e-05, + "loss": 0.2006, + "step": 1145, + "task_loss": 0.11531868577003479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7354850918049332, + "compression_loss": 0.0, + "distillation_loss": 0.06717594712972641, + "epoch": 1.09, + "learning_rate": 4.709167223648695e-05, + "loss": 0.0618, + "step": 1146, + "task_loss": 0.013451127335429192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7355116776184153, + "compression_loss": 0.0, + "distillation_loss": 0.06622839719057083, + "epoch": 1.09, + "learning_rate": 4.7086682336374187e-05, + "loss": 0.0771, + "step": 1147, + "task_loss": 0.17544050514698029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7355382561270991, + "compression_loss": 0.0, + "distillation_loss": 0.42941948771476746, + "epoch": 1.09, + "learning_rate": 4.70816884241092e-05, + "loss": 0.4142, + "step": 1148, + "task_loss": 0.2768740653991699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7355648273319885, + "compression_loss": 0.0, + "distillation_loss": 0.3746296763420105, + "epoch": 1.09, + "learning_rate": 4.7076690500599164e-05, + "loss": 0.3572, + "step": 1149, + "task_loss": 0.20025447010993958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7355913912340868, + "compression_loss": 0.0, + "distillation_loss": 0.1592552214860916, + "epoch": 1.09, + "learning_rate": 4.707168856675198e-05, + "loss": 0.1792, + "step": 1150, + "task_loss": 0.3586212992668152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.735617947834398, + "compression_loss": 0.0, + "distillation_loss": 0.16237607598304749, + "epoch": 1.09, + "learning_rate": 4.7066682623476265e-05, + "loss": 0.1536, + "step": 1151, + "task_loss": 0.07469609379768372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7356444971339255, + "compression_loss": 0.0, + "distillation_loss": 0.18786782026290894, + "epoch": 1.09, + "learning_rate": 4.706167267168138e-05, + "loss": 0.1743, + "step": 1152, + "task_loss": 0.05179518088698387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7356710391336734, + "compression_loss": 0.0, + "distillation_loss": 0.41019895672798157, + "epoch": 1.09, + "learning_rate": 4.70566587122774e-05, + "loss": 0.4003, + "step": 1153, + "task_loss": 0.311697781085968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.735697573834645, + "compression_loss": 0.0, + "distillation_loss": 0.27642178535461426, + "epoch": 1.1, + "learning_rate": 4.7051640746175147e-05, + "loss": 0.2717, + "step": 1154, + "task_loss": 0.22955429553985596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.735724101237844, + "compression_loss": 0.0, + "distillation_loss": 0.41415461897850037, + "epoch": 1.1, + "learning_rate": 4.7046618774286146e-05, + "loss": 0.3986, + "step": 1155, + "task_loss": 0.2587291896343231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7357506213442743, + "compression_loss": 0.0, + "distillation_loss": 0.10836180299520493, + "epoch": 1.1, + "learning_rate": 4.7041592797522664e-05, + "loss": 0.1089, + "step": 1156, + "task_loss": 0.11396181583404541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7357771341549394, + "compression_loss": 0.0, + "distillation_loss": 0.3199521601200104, + "epoch": 1.1, + "learning_rate": 4.7036562816797705e-05, + "loss": 0.3084, + "step": 1157, + "task_loss": 0.2041257917881012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7358036396708431, + "compression_loss": 0.0, + "distillation_loss": 0.26940402388572693, + "epoch": 1.1, + "learning_rate": 4.7031528833024976e-05, + "loss": 0.2594, + "step": 1158, + "task_loss": 0.16965427994728088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7358301378929889, + "compression_loss": 0.0, + "distillation_loss": 0.15760007500648499, + "epoch": 1.1, + "learning_rate": 4.702649084711892e-05, + "loss": 0.167, + "step": 1159, + "task_loss": 0.2519673705101013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7358566288223808, + "compression_loss": 0.0, + "distillation_loss": 0.44500085711479187, + "epoch": 1.1, + "learning_rate": 4.7021448859994735e-05, + "loss": 0.4191, + "step": 1160, + "task_loss": 0.1861766129732132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7358831124600221, + "compression_loss": 0.0, + "distillation_loss": 0.10013365000486374, + "epoch": 1.1, + "learning_rate": 4.70164028725683e-05, + "loss": 0.093, + "step": 1161, + "task_loss": 0.029256567358970642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7359095888069168, + "compression_loss": 0.0, + "distillation_loss": 0.30927249789237976, + "epoch": 1.1, + "learning_rate": 4.7011352885756255e-05, + "loss": 0.2942, + "step": 1162, + "task_loss": 0.15870612859725952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7359360578640685, + "compression_loss": 0.0, + "distillation_loss": 0.13697347044944763, + "epoch": 1.1, + "learning_rate": 4.7006298900475954e-05, + "loss": 0.1302, + "step": 1163, + "task_loss": 0.06875795125961304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7359625196324807, + "compression_loss": 0.0, + "distillation_loss": 0.37151408195495605, + "epoch": 1.11, + "learning_rate": 4.7001240917645465e-05, + "loss": 0.3618, + "step": 1164, + "task_loss": 0.27404022216796875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7359889741131573, + "compression_loss": 0.0, + "distillation_loss": 0.08429434895515442, + "epoch": 1.11, + "learning_rate": 4.699617893818361e-05, + "loss": 0.079, + "step": 1165, + "task_loss": 0.031680069863796234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.736015421307102, + "compression_loss": 0.0, + "distillation_loss": 0.20043613016605377, + "epoch": 1.11, + "learning_rate": 4.699111296300992e-05, + "loss": 0.1903, + "step": 1166, + "task_loss": 0.09880296885967255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7360418612153182, + "compression_loss": 0.0, + "distillation_loss": 0.3577537536621094, + "epoch": 1.11, + "learning_rate": 4.6986042993044645e-05, + "loss": 0.344, + "step": 1167, + "task_loss": 0.220191091299057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7360682938388099, + "compression_loss": 0.0, + "distillation_loss": 0.3422430455684662, + "epoch": 1.11, + "learning_rate": 4.698096902920877e-05, + "loss": 0.3224, + "step": 1168, + "task_loss": 0.1436607986688614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7360947191785806, + "compression_loss": 0.0, + "distillation_loss": 0.20955124497413635, + "epoch": 1.11, + "learning_rate": 4.6975891072424015e-05, + "loss": 0.2027, + "step": 1169, + "task_loss": 0.14070504903793335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7361211372356341, + "compression_loss": 0.0, + "distillation_loss": 0.16709572076797485, + "epoch": 1.11, + "learning_rate": 4.697080912361281e-05, + "loss": 0.1657, + "step": 1170, + "task_loss": 0.15344390273094177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7361475480109739, + "compression_loss": 0.0, + "distillation_loss": 0.1173921525478363, + "epoch": 1.11, + "learning_rate": 4.696572318369831e-05, + "loss": 0.1092, + "step": 1171, + "task_loss": 0.035038650035858154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7361739515056039, + "compression_loss": 0.0, + "distillation_loss": 0.09274931252002716, + "epoch": 1.11, + "learning_rate": 4.696063325360441e-05, + "loss": 0.0872, + "step": 1172, + "task_loss": 0.03688472509384155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7362003477205277, + "compression_loss": 0.0, + "distillation_loss": 0.19270846247673035, + "epoch": 1.11, + "learning_rate": 4.6955539334255716e-05, + "loss": 0.1832, + "step": 1173, + "task_loss": 0.09803463518619537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7362267366567488, + "compression_loss": 0.0, + "distillation_loss": 0.07251520454883575, + "epoch": 1.11, + "learning_rate": 4.6950441426577565e-05, + "loss": 0.0787, + "step": 1174, + "task_loss": 0.13475212454795837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7362531183152712, + "compression_loss": 0.0, + "distillation_loss": 0.3601876199245453, + "epoch": 1.12, + "learning_rate": 4.694533953149601e-05, + "loss": 0.3481, + "step": 1175, + "task_loss": 0.23886962234973907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7362794926970984, + "compression_loss": 0.0, + "distillation_loss": 0.24627402424812317, + "epoch": 1.12, + "learning_rate": 4.694023364993784e-05, + "loss": 0.2416, + "step": 1176, + "task_loss": 0.19923092424869537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.736305859803234, + "compression_loss": 0.0, + "distillation_loss": 0.1757299304008484, + "epoch": 1.12, + "learning_rate": 4.693512378283056e-05, + "loss": 0.1673, + "step": 1177, + "task_loss": 0.09126180410385132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7363322196346819, + "compression_loss": 0.0, + "distillation_loss": 0.127120703458786, + "epoch": 1.12, + "learning_rate": 4.693000993110241e-05, + "loss": 0.1282, + "step": 1178, + "task_loss": 0.13783586025238037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7363585721924456, + "compression_loss": 0.0, + "distillation_loss": 0.02453036420047283, + "epoch": 1.12, + "learning_rate": 4.692489209568234e-05, + "loss": 0.0399, + "step": 1179, + "task_loss": 0.17848730087280273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7363849174775289, + "compression_loss": 0.0, + "distillation_loss": 0.21834155917167664, + "epoch": 1.12, + "learning_rate": 4.691977027750002e-05, + "loss": 0.2075, + "step": 1180, + "task_loss": 0.11009831726551056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7364112554909354, + "compression_loss": 0.0, + "distillation_loss": 0.33225932717323303, + "epoch": 1.12, + "learning_rate": 4.691464447748587e-05, + "loss": 0.3258, + "step": 1181, + "task_loss": 0.2678440511226654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7364375862336688, + "compression_loss": 0.0, + "distillation_loss": 0.08998244255781174, + "epoch": 1.12, + "learning_rate": 4.690951469657101e-05, + "loss": 0.1002, + "step": 1182, + "task_loss": 0.1922500729560852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7364639097067329, + "compression_loss": 0.0, + "distillation_loss": 0.07570269703865051, + "epoch": 1.12, + "learning_rate": 4.690438093568728e-05, + "loss": 0.071, + "step": 1183, + "task_loss": 0.028192538768053055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7364902259111312, + "compression_loss": 0.0, + "distillation_loss": 0.1313639134168625, + "epoch": 1.12, + "learning_rate": 4.689924319576727e-05, + "loss": 0.139, + "step": 1184, + "task_loss": 0.20730452239513397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7365165348478675, + "compression_loss": 0.0, + "distillation_loss": 0.2448119819164276, + "epoch": 1.13, + "learning_rate": 4.689410147774426e-05, + "loss": 0.2476, + "step": 1185, + "task_loss": 0.2722412347793579 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7365428365179455, + "compression_loss": 0.0, + "distillation_loss": 0.35139238834381104, + "epoch": 1.13, + "learning_rate": 4.6888955782552274e-05, + "loss": 0.3343, + "step": 1186, + "task_loss": 0.18060128390789032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7365691309223688, + "compression_loss": 0.0, + "distillation_loss": 0.09512563794851303, + "epoch": 1.13, + "learning_rate": 4.688380611112605e-05, + "loss": 0.0879, + "step": 1187, + "task_loss": 0.022444602102041245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.736595418062141, + "compression_loss": 0.0, + "distillation_loss": 0.23818418383598328, + "epoch": 1.13, + "learning_rate": 4.687865246440106e-05, + "loss": 0.2262, + "step": 1188, + "task_loss": 0.11839476227760315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7366216979382659, + "compression_loss": 0.0, + "distillation_loss": 0.23918560147285461, + "epoch": 1.13, + "learning_rate": 4.687349484331347e-05, + "loss": 0.2253, + "step": 1189, + "task_loss": 0.10081011056900024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7366479705517474, + "compression_loss": 0.0, + "distillation_loss": 0.0738721638917923, + "epoch": 1.13, + "learning_rate": 4.6868333248800204e-05, + "loss": 0.0835, + "step": 1190, + "task_loss": 0.1698300540447235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7366742359035887, + "compression_loss": 0.0, + "distillation_loss": 0.32272762060165405, + "epoch": 1.13, + "learning_rate": 4.686316768179889e-05, + "loss": 0.3285, + "step": 1191, + "task_loss": 0.3799636662006378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7367004939947939, + "compression_loss": 0.0, + "distillation_loss": 0.2319181114435196, + "epoch": 1.13, + "learning_rate": 4.685799814324786e-05, + "loss": 0.2353, + "step": 1192, + "task_loss": 0.26583027839660645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7367267448263665, + "compression_loss": 0.0, + "distillation_loss": 0.17839057743549347, + "epoch": 1.13, + "learning_rate": 4.685282463408621e-05, + "loss": 0.1696, + "step": 1193, + "task_loss": 0.09003418684005737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7367529883993101, + "compression_loss": 0.0, + "distillation_loss": 0.05834461748600006, + "epoch": 1.13, + "learning_rate": 4.6847647155253716e-05, + "loss": 0.054, + "step": 1194, + "task_loss": 0.014876075088977814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7367792247146285, + "compression_loss": 0.0, + "distillation_loss": 0.09124276041984558, + "epoch": 1.13, + "learning_rate": 4.684246570769089e-05, + "loss": 0.0846, + "step": 1195, + "task_loss": 0.02449551224708557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7368054537733254, + "compression_loss": 0.0, + "distillation_loss": 0.21645143628120422, + "epoch": 1.14, + "learning_rate": 4.683728029233898e-05, + "loss": 0.204, + "step": 1196, + "task_loss": 0.09219798445701599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7368316755764045, + "compression_loss": 0.0, + "distillation_loss": 0.062870554625988, + "epoch": 1.14, + "learning_rate": 4.683209091013994e-05, + "loss": 0.0685, + "step": 1197, + "task_loss": 0.11939730495214462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7368578901248695, + "compression_loss": 0.0, + "distillation_loss": 0.04577638953924179, + "epoch": 1.14, + "learning_rate": 4.682689756203643e-05, + "loss": 0.0508, + "step": 1198, + "task_loss": 0.0960756242275238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7368840974197238, + "compression_loss": 0.0, + "distillation_loss": 0.08127100765705109, + "epoch": 1.14, + "learning_rate": 4.682170024897187e-05, + "loss": 0.0913, + "step": 1199, + "task_loss": 0.1819191575050354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7369102974619715, + "compression_loss": 0.0, + "distillation_loss": 0.06385953724384308, + "epoch": 1.14, + "learning_rate": 4.681649897189036e-05, + "loss": 0.0621, + "step": 1200, + "task_loss": 0.04620221257209778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7369364902526159, + "compression_loss": 0.0, + "distillation_loss": 0.3319193720817566, + "epoch": 1.14, + "learning_rate": 4.681129373173674e-05, + "loss": 0.3153, + "step": 1201, + "task_loss": 0.16523145139217377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.736962675792661, + "compression_loss": 0.0, + "distillation_loss": 0.11838692426681519, + "epoch": 1.14, + "learning_rate": 4.6806084529456574e-05, + "loss": 0.1378, + "step": 1202, + "task_loss": 0.3123231530189514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7369888540831102, + "compression_loss": 0.0, + "distillation_loss": 0.27343183755874634, + "epoch": 1.14, + "learning_rate": 4.6800871365996135e-05, + "loss": 0.2706, + "step": 1203, + "task_loss": 0.244797021150589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7370150251249674, + "compression_loss": 0.0, + "distillation_loss": 0.37955009937286377, + "epoch": 1.14, + "learning_rate": 4.679565424230241e-05, + "loss": 0.3633, + "step": 1204, + "task_loss": 0.2169659435749054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7370411889192363, + "compression_loss": 0.0, + "distillation_loss": 0.11844378709793091, + "epoch": 1.14, + "learning_rate": 4.679043315932313e-05, + "loss": 0.1225, + "step": 1205, + "task_loss": 0.15944825112819672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7370673454669204, + "compression_loss": 0.0, + "distillation_loss": 0.20653195679187775, + "epoch": 1.15, + "learning_rate": 4.6785208118006715e-05, + "loss": 0.1989, + "step": 1206, + "task_loss": 0.1306045949459076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7370934947690234, + "compression_loss": 0.0, + "distillation_loss": 0.16965758800506592, + "epoch": 1.15, + "learning_rate": 4.677997911930234e-05, + "loss": 0.1598, + "step": 1207, + "task_loss": 0.07105232030153275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7371196368265492, + "compression_loss": 0.0, + "distillation_loss": 0.3983684182167053, + "epoch": 1.15, + "learning_rate": 4.6774746164159854e-05, + "loss": 0.3717, + "step": 1208, + "task_loss": 0.13173796236515045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7371457716405012, + "compression_loss": 0.0, + "distillation_loss": 0.12735429406166077, + "epoch": 1.15, + "learning_rate": 4.676950925352986e-05, + "loss": 0.1323, + "step": 1209, + "task_loss": 0.17691615223884583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7371718992118833, + "compression_loss": 0.0, + "distillation_loss": 0.05615377053618431, + "epoch": 1.15, + "learning_rate": 4.676426838836367e-05, + "loss": 0.054, + "step": 1210, + "task_loss": 0.03485414758324623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7371980195416992, + "compression_loss": 0.0, + "distillation_loss": 0.10495641827583313, + "epoch": 1.15, + "learning_rate": 4.675902356961331e-05, + "loss": 0.1051, + "step": 1211, + "task_loss": 0.1060037836432457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7372241326309523, + "compression_loss": 0.0, + "distillation_loss": 0.04488696902990341, + "epoch": 1.15, + "learning_rate": 4.675377479823153e-05, + "loss": 0.0409, + "step": 1212, + "task_loss": 0.00522448867559433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7372502384806465, + "compression_loss": 0.0, + "distillation_loss": 0.15110138058662415, + "epoch": 1.15, + "learning_rate": 4.6748522075171784e-05, + "loss": 0.1397, + "step": 1213, + "task_loss": 0.03717401623725891 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7372763370917854, + "compression_loss": 0.0, + "distillation_loss": 0.2912394404411316, + "epoch": 1.15, + "learning_rate": 4.674326540138826e-05, + "loss": 0.2817, + "step": 1214, + "task_loss": 0.19608113169670105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7373024284653729, + "compression_loss": 0.0, + "distillation_loss": 0.21200403571128845, + "epoch": 1.15, + "learning_rate": 4.673800477783587e-05, + "loss": 0.2112, + "step": 1215, + "task_loss": 0.20386581122875214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7373285126024124, + "compression_loss": 0.0, + "distillation_loss": 0.29864805936813354, + "epoch": 1.15, + "learning_rate": 4.6732740205470206e-05, + "loss": 0.2818, + "step": 1216, + "task_loss": 0.13044750690460205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7373545895039076, + "compression_loss": 0.0, + "distillation_loss": 0.12389582395553589, + "epoch": 1.16, + "learning_rate": 4.672747168524762e-05, + "loss": 0.1253, + "step": 1217, + "task_loss": 0.13781008124351501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7373806591708625, + "compression_loss": 0.0, + "distillation_loss": 0.04061355069279671, + "epoch": 1.16, + "learning_rate": 4.672219921812517e-05, + "loss": 0.0385, + "step": 1218, + "task_loss": 0.019226521253585815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7374067216042803, + "compression_loss": 0.0, + "distillation_loss": 0.11448079347610474, + "epoch": 1.16, + "learning_rate": 4.671692280506061e-05, + "loss": 0.128, + "step": 1219, + "task_loss": 0.2497006058692932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7374327768051652, + "compression_loss": 0.0, + "distillation_loss": 0.09137353301048279, + "epoch": 1.16, + "learning_rate": 4.671164244701243e-05, + "loss": 0.0936, + "step": 1220, + "task_loss": 0.11410032212734222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7374588247745204, + "compression_loss": 0.0, + "distillation_loss": 0.11541637778282166, + "epoch": 1.16, + "learning_rate": 4.670635814493984e-05, + "loss": 0.1065, + "step": 1221, + "task_loss": 0.025996150448918343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7374848655133499, + "compression_loss": 0.0, + "distillation_loss": 0.0969296246767044, + "epoch": 1.16, + "learning_rate": 4.6701069899802755e-05, + "loss": 0.097, + "step": 1222, + "task_loss": 0.09771576523780823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7375108990226572, + "compression_loss": 0.0, + "distillation_loss": 0.2986716032028198, + "epoch": 1.16, + "learning_rate": 4.669577771256181e-05, + "loss": 0.2872, + "step": 1223, + "task_loss": 0.18374918401241302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7375369253034462, + "compression_loss": 0.0, + "distillation_loss": 0.06657759100198746, + "epoch": 1.16, + "learning_rate": 4.6690481584178354e-05, + "loss": 0.0615, + "step": 1224, + "task_loss": 0.015912499278783798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7375629443567204, + "compression_loss": 0.0, + "distillation_loss": 0.20490974187850952, + "epoch": 1.16, + "learning_rate": 4.6685181515614454e-05, + "loss": 0.2002, + "step": 1225, + "task_loss": 0.15764397382736206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7375889561834835, + "compression_loss": 0.0, + "distillation_loss": 0.12317511439323425, + "epoch": 1.16, + "learning_rate": 4.6679877507832895e-05, + "loss": 0.1218, + "step": 1226, + "task_loss": 0.10897918045520782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7376149607847392, + "compression_loss": 0.0, + "distillation_loss": 0.0427418127655983, + "epoch": 1.17, + "learning_rate": 4.6674569561797174e-05, + "loss": 0.0424, + "step": 1227, + "task_loss": 0.0389644056558609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7376409581614912, + "compression_loss": 0.0, + "distillation_loss": 0.14768566191196442, + "epoch": 1.17, + "learning_rate": 4.666925767847151e-05, + "loss": 0.1529, + "step": 1228, + "task_loss": 0.20031431317329407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7376669483147432, + "compression_loss": 0.0, + "distillation_loss": 0.09107698500156403, + "epoch": 1.17, + "learning_rate": 4.6663941858820825e-05, + "loss": 0.0864, + "step": 1229, + "task_loss": 0.04394202679395676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7376929312454988, + "compression_loss": 0.0, + "distillation_loss": 0.26698851585388184, + "epoch": 1.17, + "learning_rate": 4.665862210381077e-05, + "loss": 0.2655, + "step": 1230, + "task_loss": 0.2520076036453247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7377189069547618, + "compression_loss": 0.0, + "distillation_loss": 0.12991863489151, + "epoch": 1.17, + "learning_rate": 4.66532984144077e-05, + "loss": 0.1312, + "step": 1231, + "task_loss": 0.14297693967819214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7377448754435358, + "compression_loss": 0.0, + "distillation_loss": 0.22150184214115143, + "epoch": 1.17, + "learning_rate": 4.6647970791578685e-05, + "loss": 0.223, + "step": 1232, + "task_loss": 0.23616032302379608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7377708367128246, + "compression_loss": 0.0, + "distillation_loss": 0.32014334201812744, + "epoch": 1.17, + "learning_rate": 4.664263923629153e-05, + "loss": 0.3188, + "step": 1233, + "task_loss": 0.30650225281715393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7377967907636317, + "compression_loss": 0.0, + "distillation_loss": 0.485608845949173, + "epoch": 1.17, + "learning_rate": 4.663730374951472e-05, + "loss": 0.4772, + "step": 1234, + "task_loss": 0.4013437032699585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7378227375969609, + "compression_loss": 0.0, + "distillation_loss": 0.06522876024246216, + "epoch": 1.17, + "learning_rate": 4.663196433221747e-05, + "loss": 0.0715, + "step": 1235, + "task_loss": 0.12773452699184418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7378486772138159, + "compression_loss": 0.0, + "distillation_loss": 0.2558850646018982, + "epoch": 1.17, + "learning_rate": 4.6626620985369724e-05, + "loss": 0.2443, + "step": 1236, + "task_loss": 0.13986220955848694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7378746096152002, + "compression_loss": 0.0, + "distillation_loss": 0.07191958278417587, + "epoch": 1.17, + "learning_rate": 4.662127370994212e-05, + "loss": 0.076, + "step": 1237, + "task_loss": 0.11259147524833679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7379005348021177, + "compression_loss": 0.0, + "distillation_loss": 0.15925487875938416, + "epoch": 1.18, + "learning_rate": 4.6615922506906016e-05, + "loss": 0.1508, + "step": 1238, + "task_loss": 0.07494504749774933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7379264527755719, + "compression_loss": 0.0, + "distillation_loss": 0.216340109705925, + "epoch": 1.18, + "learning_rate": 4.661056737723349e-05, + "loss": 0.2053, + "step": 1239, + "task_loss": 0.10613324493169785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7379523635365667, + "compression_loss": 0.0, + "distillation_loss": 0.4159829914569855, + "epoch": 1.18, + "learning_rate": 4.660520832189732e-05, + "loss": 0.4001, + "step": 1240, + "task_loss": 0.257106214761734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7379782670861056, + "compression_loss": 0.0, + "distillation_loss": 0.04487886279821396, + "epoch": 1.18, + "learning_rate": 4.6599845341871005e-05, + "loss": 0.0454, + "step": 1241, + "task_loss": 0.050378091633319855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7380041634251924, + "compression_loss": 0.0, + "distillation_loss": 0.13262313604354858, + "epoch": 1.18, + "learning_rate": 4.6594478438128757e-05, + "loss": 0.1244, + "step": 1242, + "task_loss": 0.05053956061601639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7380300525548307, + "compression_loss": 0.0, + "distillation_loss": 0.06497453153133392, + "epoch": 1.18, + "learning_rate": 4.6589107611645497e-05, + "loss": 0.0752, + "step": 1243, + "task_loss": 0.16748173534870148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7380559344760241, + "compression_loss": 0.0, + "distillation_loss": 0.16354522109031677, + "epoch": 1.18, + "learning_rate": 4.658373286339688e-05, + "loss": 0.1567, + "step": 1244, + "task_loss": 0.09554801136255264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7380818091897765, + "compression_loss": 0.0, + "distillation_loss": 0.11697202920913696, + "epoch": 1.18, + "learning_rate": 4.6578354194359227e-05, + "loss": 0.1177, + "step": 1245, + "task_loss": 0.12439711391925812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7381076766970915, + "compression_loss": 0.0, + "distillation_loss": 0.139469712972641, + "epoch": 1.18, + "learning_rate": 4.657297160550961e-05, + "loss": 0.1305, + "step": 1246, + "task_loss": 0.04955626651644707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7381335369989727, + "compression_loss": 0.0, + "distillation_loss": 0.1701054573059082, + "epoch": 1.18, + "learning_rate": 4.656758509782582e-05, + "loss": 0.1753, + "step": 1247, + "task_loss": 0.22217293083667755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7381593900964238, + "compression_loss": 0.0, + "distillation_loss": 0.22485214471817017, + "epoch": 1.19, + "learning_rate": 4.6562194672286306e-05, + "loss": 0.2262, + "step": 1248, + "task_loss": 0.23852680623531342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7381852359904486, + "compression_loss": 0.0, + "distillation_loss": 0.06678508222103119, + "epoch": 1.19, + "learning_rate": 4.65568003298703e-05, + "loss": 0.0617, + "step": 1249, + "task_loss": 0.016354495659470558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7382110746820507, + "compression_loss": 0.0, + "distillation_loss": 0.11139645427465439, + "epoch": 1.19, + "learning_rate": 4.655140207155769e-05, + "loss": 0.1023, + "step": 1250, + "task_loss": 0.020297054201364517 + }, + { + "epoch": 1.19, + "eval_accuracy": 0.8979357798165137, + "eval_loss": 0.4085277318954468, + "eval_runtime": 18.1765, + "eval_samples_per_second": 47.974, + "eval_steps_per_second": 5.997, + "step": 1250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7382369061722337, + "compression_loss": 0.0, + "distillation_loss": 0.11869402974843979, + "epoch": 1.19, + "learning_rate": 4.65459998983291e-05, + "loss": 0.1219, + "step": 1251, + "task_loss": 0.15096434950828552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7382627304620014, + "compression_loss": 0.0, + "distillation_loss": 0.22661426663398743, + "epoch": 1.19, + "learning_rate": 4.6540593811165866e-05, + "loss": 0.2328, + "step": 1252, + "task_loss": 0.28801366686820984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7382885475523575, + "compression_loss": 0.0, + "distillation_loss": 0.03438243269920349, + "epoch": 1.19, + "learning_rate": 4.653518381105002e-05, + "loss": 0.0423, + "step": 1253, + "task_loss": 0.11402395367622375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7383143574443055, + "compression_loss": 0.0, + "distillation_loss": 0.03891141712665558, + "epoch": 1.19, + "learning_rate": 4.6529769898964325e-05, + "loss": 0.0358, + "step": 1254, + "task_loss": 0.007961155846714973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7383401601388494, + "compression_loss": 0.0, + "distillation_loss": 0.036808304488658905, + "epoch": 1.19, + "learning_rate": 4.652435207589224e-05, + "loss": 0.0468, + "step": 1255, + "task_loss": 0.1369296759366989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7383659556369926, + "compression_loss": 0.0, + "distillation_loss": 0.07549430429935455, + "epoch": 1.19, + "learning_rate": 4.651893034281793e-05, + "loss": 0.0702, + "step": 1256, + "task_loss": 0.02279655635356903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7383917439397388, + "compression_loss": 0.0, + "distillation_loss": 0.0869937464594841, + "epoch": 1.19, + "learning_rate": 4.6513504700726293e-05, + "loss": 0.0966, + "step": 1257, + "task_loss": 0.1825573891401291 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7384175250480919, + "compression_loss": 0.0, + "distillation_loss": 0.1446138620376587, + "epoch": 1.19, + "learning_rate": 4.650807515060291e-05, + "loss": 0.1397, + "step": 1258, + "task_loss": 0.09526462852954865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7384432989630553, + "compression_loss": 0.0, + "distillation_loss": 0.15781186521053314, + "epoch": 1.2, + "learning_rate": 4.650264169343411e-05, + "loss": 0.1688, + "step": 1259, + "task_loss": 0.26753732562065125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7384690656856329, + "compression_loss": 0.0, + "distillation_loss": 0.06009237468242645, + "epoch": 1.2, + "learning_rate": 4.6497204330206874e-05, + "loss": 0.0683, + "step": 1260, + "task_loss": 0.14257624745368958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7384948252168283, + "compression_loss": 0.0, + "distillation_loss": 0.1592351347208023, + "epoch": 1.2, + "learning_rate": 4.649176306190895e-05, + "loss": 0.1589, + "step": 1261, + "task_loss": 0.15542340278625488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7385205775576451, + "compression_loss": 0.0, + "distillation_loss": 0.3260449469089508, + "epoch": 1.2, + "learning_rate": 4.648631788952874e-05, + "loss": 0.3149, + "step": 1262, + "task_loss": 0.21453765034675598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7385463227090872, + "compression_loss": 0.0, + "distillation_loss": 0.03898075968027115, + "epoch": 1.2, + "learning_rate": 4.6480868814055424e-05, + "loss": 0.0397, + "step": 1263, + "task_loss": 0.04632849618792534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7385720606721581, + "compression_loss": 0.0, + "distillation_loss": 0.3080974519252777, + "epoch": 1.2, + "learning_rate": 4.647541583647883e-05, + "loss": 0.2963, + "step": 1264, + "task_loss": 0.1906224638223648 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7385977914478615, + "compression_loss": 0.0, + "distillation_loss": 0.1396906077861786, + "epoch": 1.2, + "learning_rate": 4.646995895778952e-05, + "loss": 0.1367, + "step": 1265, + "task_loss": 0.11023291200399399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7386235150372011, + "compression_loss": 0.0, + "distillation_loss": 0.16227009892463684, + "epoch": 1.2, + "learning_rate": 4.646449817897876e-05, + "loss": 0.1593, + "step": 1266, + "task_loss": 0.13288387656211853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7386492314411807, + "compression_loss": 0.0, + "distillation_loss": 0.18255099654197693, + "epoch": 1.2, + "learning_rate": 4.645903350103855e-05, + "loss": 0.1828, + "step": 1267, + "task_loss": 0.18547126650810242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7386749406608037, + "compression_loss": 0.0, + "distillation_loss": 0.13566656410694122, + "epoch": 1.2, + "learning_rate": 4.6453564924961544e-05, + "loss": 0.1234, + "step": 1268, + "task_loss": 0.012727364897727966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7387006426970741, + "compression_loss": 0.0, + "distillation_loss": 0.10819855332374573, + "epoch": 1.21, + "learning_rate": 4.644809245174114e-05, + "loss": 0.1009, + "step": 1269, + "task_loss": 0.034790411591529846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7387263375509955, + "compression_loss": 0.0, + "distillation_loss": 0.2560357451438904, + "epoch": 1.21, + "learning_rate": 4.6442616082371466e-05, + "loss": 0.2434, + "step": 1270, + "task_loss": 0.12986107170581818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7387520252235714, + "compression_loss": 0.0, + "distillation_loss": 0.06887871772050858, + "epoch": 1.21, + "learning_rate": 4.64371358178473e-05, + "loss": 0.0706, + "step": 1271, + "task_loss": 0.08568105101585388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7387777057158056, + "compression_loss": 0.0, + "distillation_loss": 0.20982679724693298, + "epoch": 1.21, + "learning_rate": 4.6431651659164174e-05, + "loss": 0.2139, + "step": 1272, + "task_loss": 0.25017955899238586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7388033790287019, + "compression_loss": 0.0, + "distillation_loss": 0.2947445511817932, + "epoch": 1.21, + "learning_rate": 4.6426163607318305e-05, + "loss": 0.2797, + "step": 1273, + "task_loss": 0.14412230253219604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7388290451632638, + "compression_loss": 0.0, + "distillation_loss": 0.10616719722747803, + "epoch": 1.21, + "learning_rate": 4.642067166330663e-05, + "loss": 0.1096, + "step": 1274, + "task_loss": 0.1407913863658905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.738854704120495, + "compression_loss": 0.0, + "distillation_loss": 0.262935608625412, + "epoch": 1.21, + "learning_rate": 4.6415175828126786e-05, + "loss": 0.2522, + "step": 1275, + "task_loss": 0.15595991909503937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7388803559013993, + "compression_loss": 0.0, + "distillation_loss": 0.08681028336286545, + "epoch": 1.21, + "learning_rate": 4.640967610277711e-05, + "loss": 0.0923, + "step": 1276, + "task_loss": 0.14123068749904633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7389060005069803, + "compression_loss": 0.0, + "distillation_loss": 0.2555930018424988, + "epoch": 1.21, + "learning_rate": 4.640417248825667e-05, + "loss": 0.2431, + "step": 1277, + "task_loss": 0.13036002218723297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7389316379382417, + "compression_loss": 0.0, + "distillation_loss": 0.03256315737962723, + "epoch": 1.21, + "learning_rate": 4.63986649855652e-05, + "loss": 0.0301, + "step": 1278, + "task_loss": 0.007671518251299858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7389572681961872, + "compression_loss": 0.0, + "distillation_loss": 0.06931599974632263, + "epoch": 1.21, + "learning_rate": 4.639315359570319e-05, + "loss": 0.0782, + "step": 1279, + "task_loss": 0.1577843278646469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7389828912818205, + "compression_loss": 0.0, + "distillation_loss": 0.09818733483552933, + "epoch": 1.22, + "learning_rate": 4.6387638319671786e-05, + "loss": 0.1179, + "step": 1280, + "task_loss": 0.2952803671360016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7390085071961452, + "compression_loss": 0.0, + "distillation_loss": 0.055818259716033936, + "epoch": 1.22, + "learning_rate": 4.6382119158472895e-05, + "loss": 0.0511, + "step": 1281, + "task_loss": 0.009016238152980804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.739034115940165, + "compression_loss": 0.0, + "distillation_loss": 0.283183753490448, + "epoch": 1.22, + "learning_rate": 4.637659611310907e-05, + "loss": 0.2704, + "step": 1282, + "task_loss": 0.15582668781280518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7390597175148836, + "compression_loss": 0.0, + "distillation_loss": 0.08733191341161728, + "epoch": 1.22, + "learning_rate": 4.637106918458361e-05, + "loss": 0.0902, + "step": 1283, + "task_loss": 0.11557549238204956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7390853119213048, + "compression_loss": 0.0, + "distillation_loss": 0.05915270373225212, + "epoch": 1.22, + "learning_rate": 4.636553837390051e-05, + "loss": 0.0552, + "step": 1284, + "task_loss": 0.01949235051870346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.739110899160432, + "compression_loss": 0.0, + "distillation_loss": 0.22277674078941345, + "epoch": 1.22, + "learning_rate": 4.636000368206447e-05, + "loss": 0.2188, + "step": 1285, + "task_loss": 0.18264567852020264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7391364792332692, + "compression_loss": 0.0, + "distillation_loss": 0.07782046496868134, + "epoch": 1.22, + "learning_rate": 4.6354465110080885e-05, + "loss": 0.0838, + "step": 1286, + "task_loss": 0.1375577300786972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7391620521408199, + "compression_loss": 0.0, + "distillation_loss": 0.1343613713979721, + "epoch": 1.22, + "learning_rate": 4.6348922658955874e-05, + "loss": 0.1423, + "step": 1287, + "task_loss": 0.2140950858592987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7391876178840878, + "compression_loss": 0.0, + "distillation_loss": 0.24139809608459473, + "epoch": 1.22, + "learning_rate": 4.634337632969624e-05, + "loss": 0.2376, + "step": 1288, + "task_loss": 0.20292049646377563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7392131764640766, + "compression_loss": 0.0, + "distillation_loss": 0.1293402463197708, + "epoch": 1.22, + "learning_rate": 4.6337826123309505e-05, + "loss": 0.123, + "step": 1289, + "task_loss": 0.06586703658103943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.73923872788179, + "compression_loss": 0.0, + "distillation_loss": 0.09735876321792603, + "epoch": 1.23, + "learning_rate": 4.6332272040803895e-05, + "loss": 0.1097, + "step": 1290, + "task_loss": 0.22072833776474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7392642721382316, + "compression_loss": 0.0, + "distillation_loss": 0.30776453018188477, + "epoch": 1.23, + "learning_rate": 4.632671408318833e-05, + "loss": 0.2998, + "step": 1291, + "task_loss": 0.22781196236610413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7392898092344052, + "compression_loss": 0.0, + "distillation_loss": 0.13507933914661407, + "epoch": 1.23, + "learning_rate": 4.6321152251472435e-05, + "loss": 0.1333, + "step": 1292, + "task_loss": 0.11699161678552628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7393153391713145, + "compression_loss": 0.0, + "distillation_loss": 0.30968376994132996, + "epoch": 1.23, + "learning_rate": 4.6315586546666556e-05, + "loss": 0.3044, + "step": 1293, + "task_loss": 0.2563600540161133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7393408619499631, + "compression_loss": 0.0, + "distillation_loss": 0.15666070580482483, + "epoch": 1.23, + "learning_rate": 4.631001696978172e-05, + "loss": 0.1524, + "step": 1294, + "task_loss": 0.11410398036241531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7393663775713546, + "compression_loss": 0.0, + "distillation_loss": 0.21059119701385498, + "epoch": 1.23, + "learning_rate": 4.630444352182968e-05, + "loss": 0.1988, + "step": 1295, + "task_loss": 0.09313502162694931 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7393918860364929, + "compression_loss": 0.0, + "distillation_loss": 0.2058752477169037, + "epoch": 1.23, + "learning_rate": 4.6298866203822865e-05, + "loss": 0.1976, + "step": 1296, + "task_loss": 0.12271516025066376 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7394173873463814, + "compression_loss": 0.0, + "distillation_loss": 0.03244621679186821, + "epoch": 1.23, + "learning_rate": 4.629328501677442e-05, + "loss": 0.0299, + "step": 1297, + "task_loss": 0.007005665451288223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7394428815020241, + "compression_loss": 0.0, + "distillation_loss": 0.05955211818218231, + "epoch": 1.23, + "learning_rate": 4.6287699961698214e-05, + "loss": 0.0587, + "step": 1298, + "task_loss": 0.050543755292892456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7394683685044244, + "compression_loss": 0.0, + "distillation_loss": 0.10339696705341339, + "epoch": 1.23, + "learning_rate": 4.6282111039608784e-05, + "loss": 0.1048, + "step": 1299, + "task_loss": 0.1170087605714798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7394938483545862, + "compression_loss": 0.0, + "distillation_loss": 0.13691368699073792, + "epoch": 1.23, + "learning_rate": 4.6276518251521384e-05, + "loss": 0.148, + "step": 1300, + "task_loss": 0.24737101793289185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.739519321053513, + "compression_loss": 0.0, + "distillation_loss": 0.2226058393716812, + "epoch": 1.24, + "learning_rate": 4.6270921598451974e-05, + "loss": 0.2199, + "step": 1301, + "task_loss": 0.19590626657009125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7395447866022086, + "compression_loss": 0.0, + "distillation_loss": 0.25497111678123474, + "epoch": 1.24, + "learning_rate": 4.6265321081417223e-05, + "loss": 0.2433, + "step": 1302, + "task_loss": 0.13829627633094788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7395702450016767, + "compression_loss": 0.0, + "distillation_loss": 0.20901359617710114, + "epoch": 1.24, + "learning_rate": 4.625971670143447e-05, + "loss": 0.2123, + "step": 1303, + "task_loss": 0.242196723818779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7395956962529209, + "compression_loss": 0.0, + "distillation_loss": 0.08364978432655334, + "epoch": 1.24, + "learning_rate": 4.625410845952181e-05, + "loss": 0.0823, + "step": 1304, + "task_loss": 0.07054957747459412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7396211403569448, + "compression_loss": 0.0, + "distillation_loss": 0.18630865216255188, + "epoch": 1.24, + "learning_rate": 4.6248496356697966e-05, + "loss": 0.1769, + "step": 1305, + "task_loss": 0.09224953502416611 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7396465773147524, + "compression_loss": 0.0, + "distillation_loss": 0.2079707384109497, + "epoch": 1.24, + "learning_rate": 4.6242880393982436e-05, + "loss": 0.1995, + "step": 1306, + "task_loss": 0.12356877326965332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.739672007127347, + "compression_loss": 0.0, + "distillation_loss": 0.2150033861398697, + "epoch": 1.24, + "learning_rate": 4.623726057239537e-05, + "loss": 0.2303, + "step": 1307, + "task_loss": 0.3683556020259857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7396974297957326, + "compression_loss": 0.0, + "distillation_loss": 0.21252375841140747, + "epoch": 1.24, + "learning_rate": 4.623163689295764e-05, + "loss": 0.2039, + "step": 1308, + "task_loss": 0.12652674317359924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7397228453209127, + "compression_loss": 0.0, + "distillation_loss": 0.21421362459659576, + "epoch": 1.24, + "learning_rate": 4.6226009356690825e-05, + "loss": 0.2026, + "step": 1309, + "task_loss": 0.09814447164535522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7397482537038911, + "compression_loss": 0.0, + "distillation_loss": 0.20858995616436005, + "epoch": 1.24, + "learning_rate": 4.622037796461718e-05, + "loss": 0.198, + "step": 1310, + "task_loss": 0.10299951583147049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7397736549456713, + "compression_loss": 0.0, + "distillation_loss": 0.2604760527610779, + "epoch": 1.25, + "learning_rate": 4.621474271775968e-05, + "loss": 0.2562, + "step": 1311, + "task_loss": 0.21733561158180237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.739799049047257, + "compression_loss": 0.0, + "distillation_loss": 0.07183189690113068, + "epoch": 1.25, + "learning_rate": 4.620910361714199e-05, + "loss": 0.068, + "step": 1312, + "task_loss": 0.03307514637708664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7398244360096521, + "compression_loss": 0.0, + "distillation_loss": 0.19191870093345642, + "epoch": 1.25, + "learning_rate": 4.620346066378849e-05, + "loss": 0.1822, + "step": 1313, + "task_loss": 0.09521719813346863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7398498158338602, + "compression_loss": 0.0, + "distillation_loss": 0.1862865388393402, + "epoch": 1.25, + "learning_rate": 4.619781385872424e-05, + "loss": 0.1838, + "step": 1314, + "task_loss": 0.16108401119709015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7398751885208849, + "compression_loss": 0.0, + "distillation_loss": 0.28807079792022705, + "epoch": 1.25, + "learning_rate": 4.6192163202975013e-05, + "loss": 0.2742, + "step": 1315, + "task_loss": 0.14911451935768127 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7399005540717298, + "compression_loss": 0.0, + "distillation_loss": 0.07601656764745712, + "epoch": 1.25, + "learning_rate": 4.618650869756728e-05, + "loss": 0.071, + "step": 1316, + "task_loss": 0.025538241490721703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7399259124873989, + "compression_loss": 0.0, + "distillation_loss": 0.08642975986003876, + "epoch": 1.25, + "learning_rate": 4.6180850343528205e-05, + "loss": 0.0799, + "step": 1317, + "task_loss": 0.021204736083745956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7399512637688955, + "compression_loss": 0.0, + "distillation_loss": 0.10312186181545258, + "epoch": 1.25, + "learning_rate": 4.617518814188565e-05, + "loss": 0.0966, + "step": 1318, + "task_loss": 0.037991687655448914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7399766079172236, + "compression_loss": 0.0, + "distillation_loss": 0.2704732418060303, + "epoch": 1.25, + "learning_rate": 4.6169522093668196e-05, + "loss": 0.2797, + "step": 1319, + "task_loss": 0.3626573085784912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7400019449333867, + "compression_loss": 0.0, + "distillation_loss": 0.10321257263422012, + "epoch": 1.25, + "learning_rate": 4.61638521999051e-05, + "loss": 0.0959, + "step": 1320, + "task_loss": 0.030156267806887627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7400272748183885, + "compression_loss": 0.0, + "distillation_loss": 0.5204591751098633, + "epoch": 1.25, + "learning_rate": 4.6158178461626323e-05, + "loss": 0.5081, + "step": 1321, + "task_loss": 0.3965243101119995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7400525975732327, + "compression_loss": 0.0, + "distillation_loss": 0.23917892575263977, + "epoch": 1.26, + "learning_rate": 4.615250087986254e-05, + "loss": 0.2394, + "step": 1322, + "task_loss": 0.24161620438098907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.740077913198923, + "compression_loss": 0.0, + "distillation_loss": 0.09203709661960602, + "epoch": 1.26, + "learning_rate": 4.6146819455645086e-05, + "loss": 0.0865, + "step": 1323, + "task_loss": 0.037142425775527954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7401032216964631, + "compression_loss": 0.0, + "distillation_loss": 0.10284596681594849, + "epoch": 1.26, + "learning_rate": 4.614113419000604e-05, + "loss": 0.1036, + "step": 1324, + "task_loss": 0.11075641214847565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7401285230668567, + "compression_loss": 0.0, + "distillation_loss": 0.2411002814769745, + "epoch": 1.26, + "learning_rate": 4.613544508397815e-05, + "loss": 0.2294, + "step": 1325, + "task_loss": 0.12409268319606781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7401538173111074, + "compression_loss": 0.0, + "distillation_loss": 0.08729246258735657, + "epoch": 1.26, + "learning_rate": 4.6129752138594874e-05, + "loss": 0.0873, + "step": 1326, + "task_loss": 0.0873517096042633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.740179104430219, + "compression_loss": 0.0, + "distillation_loss": 0.13888351619243622, + "epoch": 1.26, + "learning_rate": 4.612405535489036e-05, + "loss": 0.1341, + "step": 1327, + "task_loss": 0.09130087494850159 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.740204384425195, + "compression_loss": 0.0, + "distillation_loss": 0.1486589014530182, + "epoch": 1.26, + "learning_rate": 4.611835473389945e-05, + "loss": 0.148, + "step": 1328, + "task_loss": 0.14163073897361755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7402296572970392, + "compression_loss": 0.0, + "distillation_loss": 0.352372944355011, + "epoch": 1.26, + "learning_rate": 4.61126502766577e-05, + "loss": 0.3364, + "step": 1329, + "task_loss": 0.192458838224411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7402549230467552, + "compression_loss": 0.0, + "distillation_loss": 0.09347756952047348, + "epoch": 1.26, + "learning_rate": 4.6106941984201344e-05, + "loss": 0.1093, + "step": 1330, + "task_loss": 0.251775860786438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7402801816753469, + "compression_loss": 0.0, + "distillation_loss": 0.16438814997673035, + "epoch": 1.26, + "learning_rate": 4.610122985756733e-05, + "loss": 0.158, + "step": 1331, + "task_loss": 0.10030940920114517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7403054331838178, + "compression_loss": 0.0, + "distillation_loss": 0.186666801571846, + "epoch": 1.26, + "learning_rate": 4.609551389779328e-05, + "loss": 0.1849, + "step": 1332, + "task_loss": 0.16852930188179016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7403306775731715, + "compression_loss": 0.0, + "distillation_loss": 0.0614393912255764, + "epoch": 1.27, + "learning_rate": 4.6089794105917544e-05, + "loss": 0.063, + "step": 1333, + "task_loss": 0.0771103948354721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7403559148444119, + "compression_loss": 0.0, + "distillation_loss": 0.28277117013931274, + "epoch": 1.27, + "learning_rate": 4.6084070482979135e-05, + "loss": 0.2787, + "step": 1334, + "task_loss": 0.24248361587524414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7403811449985426, + "compression_loss": 0.0, + "distillation_loss": 0.24766165018081665, + "epoch": 1.27, + "learning_rate": 4.607834303001778e-05, + "loss": 0.2406, + "step": 1335, + "task_loss": 0.17734551429748535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7404063680365671, + "compression_loss": 0.0, + "distillation_loss": 0.21882733702659607, + "epoch": 1.27, + "learning_rate": 4.60726117480739e-05, + "loss": 0.2158, + "step": 1336, + "task_loss": 0.18877999484539032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7404315839594894, + "compression_loss": 0.0, + "distillation_loss": 0.1295139193534851, + "epoch": 1.27, + "learning_rate": 4.6066876638188604e-05, + "loss": 0.1317, + "step": 1337, + "task_loss": 0.151097372174263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.740456792768313, + "compression_loss": 0.0, + "distillation_loss": 0.199735626578331, + "epoch": 1.27, + "learning_rate": 4.606113770140371e-05, + "loss": 0.1898, + "step": 1338, + "task_loss": 0.10075341165065765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7404819944640416, + "compression_loss": 0.0, + "distillation_loss": 0.2485513985157013, + "epoch": 1.27, + "learning_rate": 4.605539493876173e-05, + "loss": 0.2438, + "step": 1339, + "task_loss": 0.20112858712673187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7405071890476789, + "compression_loss": 0.0, + "distillation_loss": 0.2236090898513794, + "epoch": 1.27, + "learning_rate": 4.604964835130585e-05, + "loss": 0.2099, + "step": 1340, + "task_loss": 0.08700668066740036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7405323765202285, + "compression_loss": 0.0, + "distillation_loss": 0.062271565198898315, + "epoch": 1.27, + "learning_rate": 4.6043897940079964e-05, + "loss": 0.0583, + "step": 1341, + "task_loss": 0.022575678303837776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7405575568826942, + "compression_loss": 0.0, + "distillation_loss": 0.08404142409563065, + "epoch": 1.27, + "learning_rate": 4.603814370612867e-05, + "loss": 0.078, + "step": 1342, + "task_loss": 0.02370348386466503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7405827301360796, + "compression_loss": 0.0, + "distillation_loss": 0.2024337202310562, + "epoch": 1.28, + "learning_rate": 4.603238565049726e-05, + "loss": 0.1899, + "step": 1343, + "task_loss": 0.07661712914705276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7406078962813885, + "compression_loss": 0.0, + "distillation_loss": 0.29442059993743896, + "epoch": 1.28, + "learning_rate": 4.60266237742317e-05, + "loss": 0.279, + "step": 1344, + "task_loss": 0.1398637890815735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7406330553196244, + "compression_loss": 0.0, + "distillation_loss": 0.03961509093642235, + "epoch": 1.28, + "learning_rate": 4.602085807837866e-05, + "loss": 0.0527, + "step": 1345, + "task_loss": 0.1707422137260437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7406582072517911, + "compression_loss": 0.0, + "distillation_loss": 0.3172229826450348, + "epoch": 1.28, + "learning_rate": 4.601508856398552e-05, + "loss": 0.3057, + "step": 1346, + "task_loss": 0.20200252532958984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7406833520788924, + "compression_loss": 0.0, + "distillation_loss": 0.1311604082584381, + "epoch": 1.28, + "learning_rate": 4.6009315232100324e-05, + "loss": 0.1316, + "step": 1347, + "task_loss": 0.13595682382583618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7407084898019316, + "compression_loss": 0.0, + "distillation_loss": 0.08850497007369995, + "epoch": 1.28, + "learning_rate": 4.600353808377184e-05, + "loss": 0.0874, + "step": 1348, + "task_loss": 0.07742930948734283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7407336204219128, + "compression_loss": 0.0, + "distillation_loss": 0.03612298145890236, + "epoch": 1.28, + "learning_rate": 4.599775712004951e-05, + "loss": 0.0333, + "step": 1349, + "task_loss": 0.007720911875367165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7407587439398395, + "compression_loss": 0.0, + "distillation_loss": 0.552012026309967, + "epoch": 1.28, + "learning_rate": 4.599197234198347e-05, + "loss": 0.5405, + "step": 1350, + "task_loss": 0.4371148347854614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7407838603567154, + "compression_loss": 0.0, + "distillation_loss": 0.07328462600708008, + "epoch": 1.28, + "learning_rate": 4.5986183750624555e-05, + "loss": 0.0754, + "step": 1351, + "task_loss": 0.09475519508123398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7408089696735441, + "compression_loss": 0.0, + "distillation_loss": 0.2518464922904968, + "epoch": 1.28, + "learning_rate": 4.5980391347024296e-05, + "loss": 0.2416, + "step": 1352, + "task_loss": 0.14951427280902863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7408340718913294, + "compression_loss": 0.0, + "distillation_loss": 0.28138667345046997, + "epoch": 1.28, + "learning_rate": 4.59745951322349e-05, + "loss": 0.2688, + "step": 1353, + "task_loss": 0.15557241439819336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7408591670110749, + "compression_loss": 0.0, + "distillation_loss": 0.11055044829845428, + "epoch": 1.29, + "learning_rate": 4.596879510730929e-05, + "loss": 0.1117, + "step": 1354, + "task_loss": 0.12174604833126068 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7408842550337844, + "compression_loss": 0.0, + "distillation_loss": 0.0641191229224205, + "epoch": 1.29, + "learning_rate": 4.596299127330106e-05, + "loss": 0.0583, + "step": 1355, + "task_loss": 0.0056402478367090225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7409093359604615, + "compression_loss": 0.0, + "distillation_loss": 0.06709885597229004, + "epoch": 1.29, + "learning_rate": 4.59571836312645e-05, + "loss": 0.0675, + "step": 1356, + "task_loss": 0.07135514914989471 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7409344097921098, + "compression_loss": 0.0, + "distillation_loss": 0.1536739617586136, + "epoch": 1.29, + "learning_rate": 4.595137218225461e-05, + "loss": 0.1479, + "step": 1357, + "task_loss": 0.09615078568458557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7409594765297332, + "compression_loss": 0.0, + "distillation_loss": 0.2070506066083908, + "epoch": 1.29, + "learning_rate": 4.594555692732706e-05, + "loss": 0.2038, + "step": 1358, + "task_loss": 0.1743832379579544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.740984536174335, + "compression_loss": 0.0, + "distillation_loss": 0.24169021844863892, + "epoch": 1.29, + "learning_rate": 4.593973786753821e-05, + "loss": 0.2442, + "step": 1359, + "task_loss": 0.2664361894130707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7410095887269194, + "compression_loss": 0.0, + "distillation_loss": 0.2801350951194763, + "epoch": 1.29, + "learning_rate": 4.593391500394514e-05, + "loss": 0.2794, + "step": 1360, + "task_loss": 0.2726552486419678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7410346341884897, + "compression_loss": 0.0, + "distillation_loss": 0.09238822013139725, + "epoch": 1.29, + "learning_rate": 4.5928088337605586e-05, + "loss": 0.0886, + "step": 1361, + "task_loss": 0.05469810217618942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7410596725600497, + "compression_loss": 0.0, + "distillation_loss": 0.09862814098596573, + "epoch": 1.29, + "learning_rate": 4.5922257869578e-05, + "loss": 0.0927, + "step": 1362, + "task_loss": 0.0397910512983799 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.741084703842603, + "compression_loss": 0.0, + "distillation_loss": 0.18891645967960358, + "epoch": 1.29, + "learning_rate": 4.5916423600921496e-05, + "loss": 0.1851, + "step": 1363, + "task_loss": 0.1504015326499939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7411097280371535, + "compression_loss": 0.0, + "distillation_loss": 0.34903091192245483, + "epoch": 1.3, + "learning_rate": 4.591058553269593e-05, + "loss": 0.3357, + "step": 1364, + "task_loss": 0.21621909737586975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7411347451447046, + "compression_loss": 0.0, + "distillation_loss": 0.0994444340467453, + "epoch": 1.3, + "learning_rate": 4.590474366596178e-05, + "loss": 0.1141, + "step": 1365, + "task_loss": 0.24601247906684875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7411597551662602, + "compression_loss": 0.0, + "distillation_loss": 0.11619491875171661, + "epoch": 1.3, + "learning_rate": 4.589889800178026e-05, + "loss": 0.1112, + "step": 1366, + "task_loss": 0.06617462635040283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7411847581028239, + "compression_loss": 0.0, + "distillation_loss": 0.08728669583797455, + "epoch": 1.3, + "learning_rate": 4.589304854121329e-05, + "loss": 0.0875, + "step": 1367, + "task_loss": 0.08968639373779297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7412097539553995, + "compression_loss": 0.0, + "distillation_loss": 0.13692383468151093, + "epoch": 1.3, + "learning_rate": 4.588719528532342e-05, + "loss": 0.1391, + "step": 1368, + "task_loss": 0.15917925536632538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7412347427249903, + "compression_loss": 0.0, + "distillation_loss": 0.2873116433620453, + "epoch": 1.3, + "learning_rate": 4.588133823517392e-05, + "loss": 0.2825, + "step": 1369, + "task_loss": 0.23926284909248352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7412597244126005, + "compression_loss": 0.0, + "distillation_loss": 0.22201889753341675, + "epoch": 1.3, + "learning_rate": 4.587547739182878e-05, + "loss": 0.2193, + "step": 1370, + "task_loss": 0.19527406990528107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7412846990192334, + "compression_loss": 0.0, + "distillation_loss": 0.15326130390167236, + "epoch": 1.3, + "learning_rate": 4.586961275635263e-05, + "loss": 0.1554, + "step": 1371, + "task_loss": 0.17474180459976196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7413096665458929, + "compression_loss": 0.0, + "distillation_loss": 0.1608862280845642, + "epoch": 1.3, + "learning_rate": 4.586374432981081e-05, + "loss": 0.1622, + "step": 1372, + "task_loss": 0.17383195459842682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7413346269935825, + "compression_loss": 0.0, + "distillation_loss": 0.05019424855709076, + "epoch": 1.3, + "learning_rate": 4.585787211326935e-05, + "loss": 0.0561, + "step": 1373, + "task_loss": 0.1092485561966896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7413595803633061, + "compression_loss": 0.0, + "distillation_loss": 0.45158857107162476, + "epoch": 1.3, + "learning_rate": 4.5851996107794975e-05, + "loss": 0.4364, + "step": 1374, + "task_loss": 0.2999117970466614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7413845266560671, + "compression_loss": 0.0, + "distillation_loss": 0.128067284822464, + "epoch": 1.31, + "learning_rate": 4.584611631445508e-05, + "loss": 0.1328, + "step": 1375, + "task_loss": 0.1756190061569214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7414094658728695, + "compression_loss": 0.0, + "distillation_loss": 0.1457686424255371, + "epoch": 1.31, + "learning_rate": 4.5840232734317754e-05, + "loss": 0.1414, + "step": 1376, + "task_loss": 0.10216192901134491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7414343980147168, + "compression_loss": 0.0, + "distillation_loss": 0.33278051018714905, + "epoch": 1.31, + "learning_rate": 4.583434536845179e-05, + "loss": 0.3246, + "step": 1377, + "task_loss": 0.2506526708602905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7414593230826126, + "compression_loss": 0.0, + "distillation_loss": 0.1251896768808365, + "epoch": 1.31, + "learning_rate": 4.5828454217926654e-05, + "loss": 0.1325, + "step": 1378, + "task_loss": 0.19821739196777344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7414842410775608, + "compression_loss": 0.0, + "distillation_loss": 0.18593353033065796, + "epoch": 1.31, + "learning_rate": 4.5822559283812496e-05, + "loss": 0.1785, + "step": 1379, + "task_loss": 0.11133649945259094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.741509152000565, + "compression_loss": 0.0, + "distillation_loss": 0.058180175721645355, + "epoch": 1.31, + "learning_rate": 4.581666056718016e-05, + "loss": 0.0539, + "step": 1380, + "task_loss": 0.015172762796282768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7415340558526288, + "compression_loss": 0.0, + "distillation_loss": 0.2561149001121521, + "epoch": 1.31, + "learning_rate": 4.5810758069101175e-05, + "loss": 0.2525, + "step": 1381, + "task_loss": 0.2195451259613037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7415589526347559, + "compression_loss": 0.0, + "distillation_loss": 0.3725208044052124, + "epoch": 1.31, + "learning_rate": 4.580485179064777e-05, + "loss": 0.3704, + "step": 1382, + "task_loss": 0.350993275642395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.74158384234795, + "compression_loss": 0.0, + "distillation_loss": 0.03700024634599686, + "epoch": 1.31, + "learning_rate": 4.579894173289284e-05, + "loss": 0.0392, + "step": 1383, + "task_loss": 0.0591222308576107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7416087249932148, + "compression_loss": 0.0, + "distillation_loss": 0.12477545440196991, + "epoch": 1.31, + "learning_rate": 4.579302789690997e-05, + "loss": 0.1158, + "step": 1384, + "task_loss": 0.03471755236387253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7416336005715541, + "compression_loss": 0.0, + "distillation_loss": 0.1321507692337036, + "epoch": 1.32, + "learning_rate": 4.578711028377344e-05, + "loss": 0.1349, + "step": 1385, + "task_loss": 0.1596865952014923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7416584690839714, + "compression_loss": 0.0, + "distillation_loss": 0.09508243948221207, + "epoch": 1.32, + "learning_rate": 4.578118889455821e-05, + "loss": 0.0901, + "step": 1386, + "task_loss": 0.04528198391199112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7416833305314704, + "compression_loss": 0.0, + "distillation_loss": 0.09540075063705444, + "epoch": 1.32, + "learning_rate": 4.577526373033994e-05, + "loss": 0.108, + "step": 1387, + "task_loss": 0.22137771546840668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.741708184915055, + "compression_loss": 0.0, + "distillation_loss": 0.14678335189819336, + "epoch": 1.32, + "learning_rate": 4.576933479219496e-05, + "loss": 0.1414, + "step": 1388, + "task_loss": 0.0929180309176445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7417330322357285, + "compression_loss": 0.0, + "distillation_loss": 0.18357060849666595, + "epoch": 1.32, + "learning_rate": 4.5763402081200294e-05, + "loss": 0.1835, + "step": 1389, + "task_loss": 0.18299201130867004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7417578724944949, + "compression_loss": 0.0, + "distillation_loss": 0.18099266290664673, + "epoch": 1.32, + "learning_rate": 4.575746559843364e-05, + "loss": 0.176, + "step": 1390, + "task_loss": 0.13100393116474152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7417827056923577, + "compression_loss": 0.0, + "distillation_loss": 0.03351439908146858, + "epoch": 1.32, + "learning_rate": 4.5751525344973384e-05, + "loss": 0.0415, + "step": 1391, + "task_loss": 0.11322241276502609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7418075318303208, + "compression_loss": 0.0, + "distillation_loss": 0.06732058525085449, + "epoch": 1.32, + "learning_rate": 4.5745581321898615e-05, + "loss": 0.0621, + "step": 1392, + "task_loss": 0.015335185453295708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7418323509093876, + "compression_loss": 0.0, + "distillation_loss": 0.2339862734079361, + "epoch": 1.32, + "learning_rate": 4.5739633530289085e-05, + "loss": 0.2269, + "step": 1393, + "task_loss": 0.16297489404678345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.741857162930562, + "compression_loss": 0.0, + "distillation_loss": 0.042287249118089676, + "epoch": 1.32, + "learning_rate": 4.573368197122524e-05, + "loss": 0.0504, + "step": 1394, + "task_loss": 0.12305901199579239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7418819678948476, + "compression_loss": 0.0, + "distillation_loss": 0.20534148812294006, + "epoch": 1.32, + "learning_rate": 4.572772664578821e-05, + "loss": 0.1949, + "step": 1395, + "task_loss": 0.10102957487106323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.741906765803248, + "compression_loss": 0.0, + "distillation_loss": 0.056711532175540924, + "epoch": 1.33, + "learning_rate": 4.572176755505981e-05, + "loss": 0.0517, + "step": 1396, + "task_loss": 0.006524372845888138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.741931556656767, + "compression_loss": 0.0, + "distillation_loss": 0.04466833174228668, + "epoch": 1.33, + "learning_rate": 4.571580470012254e-05, + "loss": 0.0415, + "step": 1397, + "task_loss": 0.013324148952960968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7419563404564083, + "compression_loss": 0.0, + "distillation_loss": 0.1416427046060562, + "epoch": 1.33, + "learning_rate": 4.5709838082059574e-05, + "loss": 0.1331, + "step": 1398, + "task_loss": 0.05632413551211357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7419811172031756, + "compression_loss": 0.0, + "distillation_loss": 0.1042378842830658, + "epoch": 1.33, + "learning_rate": 4.570386770195478e-05, + "loss": 0.0981, + "step": 1399, + "task_loss": 0.04313955828547478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7420058868980723, + "compression_loss": 0.0, + "distillation_loss": 0.1780770719051361, + "epoch": 1.33, + "learning_rate": 4.569789356089271e-05, + "loss": 0.177, + "step": 1400, + "task_loss": 0.16733302175998688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7420306495421025, + "compression_loss": 0.0, + "distillation_loss": 0.258390873670578, + "epoch": 1.33, + "learning_rate": 4.569191565995859e-05, + "loss": 0.2603, + "step": 1401, + "task_loss": 0.2771160304546356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7420554051362696, + "compression_loss": 0.0, + "distillation_loss": 0.3139894902706146, + "epoch": 1.33, + "learning_rate": 4.568593400023834e-05, + "loss": 0.3055, + "step": 1402, + "task_loss": 0.22894920408725739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7420801536815774, + "compression_loss": 0.0, + "distillation_loss": 0.17985640466213226, + "epoch": 1.33, + "learning_rate": 4.567994858281855e-05, + "loss": 0.1712, + "step": 1403, + "task_loss": 0.09336966276168823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7421048951790294, + "compression_loss": 0.0, + "distillation_loss": 0.19855880737304688, + "epoch": 1.33, + "learning_rate": 4.56739594087865e-05, + "loss": 0.2139, + "step": 1404, + "task_loss": 0.35179561376571655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7421296296296296, + "compression_loss": 0.0, + "distillation_loss": 0.4204038977622986, + "epoch": 1.33, + "learning_rate": 4.566796647923017e-05, + "loss": 0.4184, + "step": 1405, + "task_loss": 0.4005380868911743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7421543570343815, + "compression_loss": 0.0, + "distillation_loss": 0.18849390745162964, + "epoch": 1.34, + "learning_rate": 4.566196979523818e-05, + "loss": 0.1992, + "step": 1406, + "task_loss": 0.2956838607788086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7421790773942887, + "compression_loss": 0.0, + "distillation_loss": 0.39693641662597656, + "epoch": 1.34, + "learning_rate": 4.5655969357899874e-05, + "loss": 0.3956, + "step": 1407, + "task_loss": 0.38323667645454407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.742203790710355, + "compression_loss": 0.0, + "distillation_loss": 0.21428659558296204, + "epoch": 1.34, + "learning_rate": 4.564996516830525e-05, + "loss": 0.2072, + "step": 1408, + "task_loss": 0.14346663653850555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7422284969835841, + "compression_loss": 0.0, + "distillation_loss": 0.15829120576381683, + "epoch": 1.34, + "learning_rate": 4.564395722754501e-05, + "loss": 0.1601, + "step": 1409, + "task_loss": 0.17646542191505432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7422531962149795, + "compression_loss": 0.0, + "distillation_loss": 0.13824407756328583, + "epoch": 1.34, + "learning_rate": 4.56379455367105e-05, + "loss": 0.129, + "step": 1410, + "task_loss": 0.04564966261386871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7422778884055451, + "compression_loss": 0.0, + "distillation_loss": 0.05707935988903046, + "epoch": 1.34, + "learning_rate": 4.563193009689381e-05, + "loss": 0.0538, + "step": 1411, + "task_loss": 0.023959307000041008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7423025735562846, + "compression_loss": 0.0, + "distillation_loss": 0.42523133754730225, + "epoch": 1.34, + "learning_rate": 4.562591090918764e-05, + "loss": 0.4085, + "step": 1412, + "task_loss": 0.2577897012233734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7423272516682015, + "compression_loss": 0.0, + "distillation_loss": 0.37662801146507263, + "epoch": 1.34, + "learning_rate": 4.561988797468542e-05, + "loss": 0.3691, + "step": 1413, + "task_loss": 0.30150848627090454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7423519227422994, + "compression_loss": 0.0, + "distillation_loss": 0.21535269916057587, + "epoch": 1.34, + "learning_rate": 4.561386129448125e-05, + "loss": 0.205, + "step": 1414, + "task_loss": 0.11162030696868896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7423765867795824, + "compression_loss": 0.0, + "distillation_loss": 0.14912311732769012, + "epoch": 1.34, + "learning_rate": 4.5607830869669885e-05, + "loss": 0.1563, + "step": 1415, + "task_loss": 0.22072558104991913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7424012437810538, + "compression_loss": 0.0, + "distillation_loss": 0.1851930022239685, + "epoch": 1.34, + "learning_rate": 4.560179670134681e-05, + "loss": 0.1752, + "step": 1416, + "task_loss": 0.08566315472126007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7424258937477174, + "compression_loss": 0.0, + "distillation_loss": 0.14120402932167053, + "epoch": 1.35, + "learning_rate": 4.559575879060813e-05, + "loss": 0.1323, + "step": 1417, + "task_loss": 0.05225841701030731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7424505366805769, + "compression_loss": 0.0, + "distillation_loss": 0.29736214876174927, + "epoch": 1.35, + "learning_rate": 4.5589717138550685e-05, + "loss": 0.2882, + "step": 1418, + "task_loss": 0.20604415237903595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.742475172580636, + "compression_loss": 0.0, + "distillation_loss": 0.08903981745243073, + "epoch": 1.35, + "learning_rate": 4.5583671746271964e-05, + "loss": 0.0856, + "step": 1419, + "task_loss": 0.054539553821086884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7424998014488984, + "compression_loss": 0.0, + "distillation_loss": 0.0935673713684082, + "epoch": 1.35, + "learning_rate": 4.557762261487013e-05, + "loss": 0.0914, + "step": 1420, + "task_loss": 0.07140633463859558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7425244232863677, + "compression_loss": 0.0, + "distillation_loss": 0.19990570843219757, + "epoch": 1.35, + "learning_rate": 4.557156974544404e-05, + "loss": 0.1898, + "step": 1421, + "task_loss": 0.09906322509050369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7425490380940476, + "compression_loss": 0.0, + "distillation_loss": 0.24201074242591858, + "epoch": 1.35, + "learning_rate": 4.5565513139093244e-05, + "loss": 0.2452, + "step": 1422, + "task_loss": 0.27348941564559937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7425736458729418, + "compression_loss": 0.0, + "distillation_loss": 0.2360510677099228, + "epoch": 1.35, + "learning_rate": 4.5559452796917936e-05, + "loss": 0.2237, + "step": 1423, + "task_loss": 0.1127673014998436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.742598246624054, + "compression_loss": 0.0, + "distillation_loss": 0.15818031132221222, + "epoch": 1.35, + "learning_rate": 4.555338872001901e-05, + "loss": 0.1643, + "step": 1424, + "task_loss": 0.2189611941576004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7426228403483879, + "compression_loss": 0.0, + "distillation_loss": 0.45690128207206726, + "epoch": 1.35, + "learning_rate": 4.554732090949805e-05, + "loss": 0.4419, + "step": 1425, + "task_loss": 0.30732840299606323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7426474270469471, + "compression_loss": 0.0, + "distillation_loss": 0.15233120322227478, + "epoch": 1.35, + "learning_rate": 4.5541249366457276e-05, + "loss": 0.1699, + "step": 1426, + "task_loss": 0.32829368114471436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7426720067207354, + "compression_loss": 0.0, + "distillation_loss": 0.24415679275989532, + "epoch": 1.36, + "learning_rate": 4.5535174091999636e-05, + "loss": 0.2323, + "step": 1427, + "task_loss": 0.12578970193862915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7426965793707563, + "compression_loss": 0.0, + "distillation_loss": 0.10446126759052277, + "epoch": 1.36, + "learning_rate": 4.552909508722871e-05, + "loss": 0.0981, + "step": 1428, + "task_loss": 0.040688008069992065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7427211449980137, + "compression_loss": 0.0, + "distillation_loss": 0.13874554634094238, + "epoch": 1.36, + "learning_rate": 4.55230123532488e-05, + "loss": 0.1403, + "step": 1429, + "task_loss": 0.15391620993614197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7427457036035111, + "compression_loss": 0.0, + "distillation_loss": 0.04957931116223335, + "epoch": 1.36, + "learning_rate": 4.551692589116486e-05, + "loss": 0.058, + "step": 1430, + "task_loss": 0.13414397835731506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7427702551882523, + "compression_loss": 0.0, + "distillation_loss": 0.18771857023239136, + "epoch": 1.36, + "learning_rate": 4.551083570208252e-05, + "loss": 0.1799, + "step": 1431, + "task_loss": 0.10969773679971695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7427947997532409, + "compression_loss": 0.0, + "distillation_loss": 0.2879871129989624, + "epoch": 1.36, + "learning_rate": 4.550474178710809e-05, + "loss": 0.2745, + "step": 1432, + "task_loss": 0.15289351344108582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7428193372994807, + "compression_loss": 0.0, + "distillation_loss": 0.0784875899553299, + "epoch": 1.36, + "learning_rate": 4.549864414734856e-05, + "loss": 0.0731, + "step": 1433, + "task_loss": 0.02427070587873459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7428438678279753, + "compression_loss": 0.0, + "distillation_loss": 0.08174806833267212, + "epoch": 1.36, + "learning_rate": 4.54925427839116e-05, + "loss": 0.0832, + "step": 1434, + "task_loss": 0.09647224843502045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7428683913397283, + "compression_loss": 0.0, + "distillation_loss": 0.1861201375722885, + "epoch": 1.36, + "learning_rate": 4.548643769790556e-05, + "loss": 0.1766, + "step": 1435, + "task_loss": 0.09077153354883194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7428929078357436, + "compression_loss": 0.0, + "distillation_loss": 0.1611296832561493, + "epoch": 1.36, + "learning_rate": 4.548032889043944e-05, + "loss": 0.169, + "step": 1436, + "task_loss": 0.2397400438785553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7429174173170245, + "compression_loss": 0.0, + "distillation_loss": 0.14555801451206207, + "epoch": 1.36, + "learning_rate": 4.547421636262294e-05, + "loss": 0.1467, + "step": 1437, + "task_loss": 0.1570826917886734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7429419197845752, + "compression_loss": 0.0, + "distillation_loss": 0.23441661894321442, + "epoch": 1.37, + "learning_rate": 4.546810011556644e-05, + "loss": 0.2309, + "step": 1438, + "task_loss": 0.19945275783538818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.742966415239399, + "compression_loss": 0.0, + "distillation_loss": 0.1089128702878952, + "epoch": 1.37, + "learning_rate": 4.546198015038097e-05, + "loss": 0.1146, + "step": 1439, + "task_loss": 0.16534297168254852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7429909036824998, + "compression_loss": 0.0, + "distillation_loss": 0.2072802484035492, + "epoch": 1.37, + "learning_rate": 4.545585646817826e-05, + "loss": 0.1989, + "step": 1440, + "task_loss": 0.12354776263237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7430153851148811, + "compression_loss": 0.0, + "distillation_loss": 0.3501192629337311, + "epoch": 1.37, + "learning_rate": 4.544972907007071e-05, + "loss": 0.335, + "step": 1441, + "task_loss": 0.19924892485141754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7430398595375466, + "compression_loss": 0.0, + "distillation_loss": 0.45086967945098877, + "epoch": 1.37, + "learning_rate": 4.544359795717139e-05, + "loss": 0.4323, + "step": 1442, + "task_loss": 0.265533447265625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7430643269515002, + "compression_loss": 0.0, + "distillation_loss": 0.12026548385620117, + "epoch": 1.37, + "learning_rate": 4.543746313059404e-05, + "loss": 0.1146, + "step": 1443, + "task_loss": 0.063462033867836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7430887873577453, + "compression_loss": 0.0, + "distillation_loss": 0.17728132009506226, + "epoch": 1.37, + "learning_rate": 4.5431324591453094e-05, + "loss": 0.1698, + "step": 1444, + "task_loss": 0.10240338742733002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7431132407572857, + "compression_loss": 0.0, + "distillation_loss": 0.3929315209388733, + "epoch": 1.37, + "learning_rate": 4.5425182340863626e-05, + "loss": 0.3889, + "step": 1445, + "task_loss": 0.3523111641407013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7431376871511252, + "compression_loss": 0.0, + "distillation_loss": 0.14356978237628937, + "epoch": 1.37, + "learning_rate": 4.541903637994142e-05, + "loss": 0.1433, + "step": 1446, + "task_loss": 0.14127568900585175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7431621265402673, + "compression_loss": 0.0, + "distillation_loss": 0.18327973783016205, + "epoch": 1.37, + "learning_rate": 4.541288670980291e-05, + "loss": 0.171, + "step": 1447, + "task_loss": 0.060883235186338425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7431865589257157, + "compression_loss": 0.0, + "distillation_loss": 0.12613250315189362, + "epoch": 1.38, + "learning_rate": 4.540673333156523e-05, + "loss": 0.1266, + "step": 1448, + "task_loss": 0.1305573284626007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7432109843084742, + "compression_loss": 0.0, + "distillation_loss": 0.11743993312120438, + "epoch": 1.38, + "learning_rate": 4.540057624634616e-05, + "loss": 0.11, + "step": 1449, + "task_loss": 0.04330931603908539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7432354026895465, + "compression_loss": 0.0, + "distillation_loss": 0.16623345017433167, + "epoch": 1.38, + "learning_rate": 4.5394415455264164e-05, + "loss": 0.1572, + "step": 1450, + "task_loss": 0.07557562738656998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.743259814069936, + "compression_loss": 0.0, + "distillation_loss": 0.20014873147010803, + "epoch": 1.38, + "learning_rate": 4.538825095943838e-05, + "loss": 0.2043, + "step": 1451, + "task_loss": 0.24151219427585602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7432842184506467, + "compression_loss": 0.0, + "distillation_loss": 0.14636574685573578, + "epoch": 1.38, + "learning_rate": 4.538208275998861e-05, + "loss": 0.1444, + "step": 1452, + "task_loss": 0.12678012251853943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7433086158326822, + "compression_loss": 0.0, + "distillation_loss": 0.12708376348018646, + "epoch": 1.38, + "learning_rate": 4.537591085803535e-05, + "loss": 0.1272, + "step": 1453, + "task_loss": 0.1282857060432434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7433330062170461, + "compression_loss": 0.0, + "distillation_loss": 0.18036913871765137, + "epoch": 1.38, + "learning_rate": 4.5369735254699754e-05, + "loss": 0.1817, + "step": 1454, + "task_loss": 0.19413068890571594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7433573896047422, + "compression_loss": 0.0, + "distillation_loss": 0.13496941328048706, + "epoch": 1.38, + "learning_rate": 4.536355595110365e-05, + "loss": 0.1332, + "step": 1455, + "task_loss": 0.11723415553569794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.743381765996774, + "compression_loss": 0.0, + "distillation_loss": 0.19335393607616425, + "epoch": 1.38, + "learning_rate": 4.5357372948369534e-05, + "loss": 0.1765, + "step": 1456, + "task_loss": 0.024988338351249695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7434061353941454, + "compression_loss": 0.0, + "distillation_loss": 0.19088754057884216, + "epoch": 1.38, + "learning_rate": 4.535118624762057e-05, + "loss": 0.1783, + "step": 1457, + "task_loss": 0.06504514068365097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7434304977978599, + "compression_loss": 0.0, + "distillation_loss": 0.059609562158584595, + "epoch": 1.38, + "learning_rate": 4.534499584998062e-05, + "loss": 0.0554, + "step": 1458, + "task_loss": 0.01706998609006405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7434548532089212, + "compression_loss": 0.0, + "distillation_loss": 0.14300726354122162, + "epoch": 1.39, + "learning_rate": 4.533880175657419e-05, + "loss": 0.1359, + "step": 1459, + "task_loss": 0.07233646512031555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7434792016283331, + "compression_loss": 0.0, + "distillation_loss": 0.07946481555700302, + "epoch": 1.39, + "learning_rate": 4.533260396852646e-05, + "loss": 0.0883, + "step": 1460, + "task_loss": 0.167746901512146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7435035430570993, + "compression_loss": 0.0, + "distillation_loss": 0.16639113426208496, + "epoch": 1.39, + "learning_rate": 4.532640248696331e-05, + "loss": 0.1619, + "step": 1461, + "task_loss": 0.12190359830856323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7435278774962234, + "compression_loss": 0.0, + "distillation_loss": 0.1874895542860031, + "epoch": 1.39, + "learning_rate": 4.532019731301125e-05, + "loss": 0.18, + "step": 1462, + "task_loss": 0.11228100210428238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.743552204946709, + "compression_loss": 0.0, + "distillation_loss": 0.18930573761463165, + "epoch": 1.39, + "learning_rate": 4.531398844779749e-05, + "loss": 0.189, + "step": 1463, + "task_loss": 0.18576987087726593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7435765254095599, + "compression_loss": 0.0, + "distillation_loss": 0.17121566832065582, + "epoch": 1.39, + "learning_rate": 4.530777589244989e-05, + "loss": 0.1633, + "step": 1464, + "task_loss": 0.09190039336681366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7436008388857798, + "compression_loss": 0.0, + "distillation_loss": 0.05851981043815613, + "epoch": 1.39, + "learning_rate": 4.5301559648096995e-05, + "loss": 0.0584, + "step": 1465, + "task_loss": 0.05738149955868721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7436251453763723, + "compression_loss": 0.0, + "distillation_loss": 0.18863308429718018, + "epoch": 1.39, + "learning_rate": 4.529533971586802e-05, + "loss": 0.1878, + "step": 1466, + "task_loss": 0.1802268773317337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7436494448823411, + "compression_loss": 0.0, + "distillation_loss": 0.07734081149101257, + "epoch": 1.39, + "learning_rate": 4.5289116096892834e-05, + "loss": 0.0851, + "step": 1467, + "task_loss": 0.15495401620864868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7436737374046899, + "compression_loss": 0.0, + "distillation_loss": 0.07386526465415955, + "epoch": 1.39, + "learning_rate": 4.5282888792302e-05, + "loss": 0.0687, + "step": 1468, + "task_loss": 0.022300483658909798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7436980229444223, + "compression_loss": 0.0, + "distillation_loss": 0.19114729762077332, + "epoch": 1.4, + "learning_rate": 4.527665780322674e-05, + "loss": 0.2027, + "step": 1469, + "task_loss": 0.3064166009426117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7437223015025421, + "compression_loss": 0.0, + "distillation_loss": 0.3061857223510742, + "epoch": 1.4, + "learning_rate": 4.527042313079893e-05, + "loss": 0.2977, + "step": 1470, + "task_loss": 0.22168110311031342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.743746573080053, + "compression_loss": 0.0, + "distillation_loss": 0.28121721744537354, + "epoch": 1.4, + "learning_rate": 4.526418477615114e-05, + "loss": 0.2646, + "step": 1471, + "task_loss": 0.11492887139320374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7437708376779586, + "compression_loss": 0.0, + "distillation_loss": 0.12805329263210297, + "epoch": 1.4, + "learning_rate": 4.525794274041658e-05, + "loss": 0.1237, + "step": 1472, + "task_loss": 0.08421643078327179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7437950952972626, + "compression_loss": 0.0, + "distillation_loss": 0.21808746457099915, + "epoch": 1.4, + "learning_rate": 4.5251697024729165e-05, + "loss": 0.2269, + "step": 1473, + "task_loss": 0.30643704533576965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7438193459389687, + "compression_loss": 0.0, + "distillation_loss": 0.12836065888404846, + "epoch": 1.4, + "learning_rate": 4.524544763022346e-05, + "loss": 0.1309, + "step": 1474, + "task_loss": 0.15339140594005585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7438435896040806, + "compression_loss": 0.0, + "distillation_loss": 0.25501298904418945, + "epoch": 1.4, + "learning_rate": 4.523919455803468e-05, + "loss": 0.2459, + "step": 1475, + "task_loss": 0.1641104519367218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7438678262936019, + "compression_loss": 0.0, + "distillation_loss": 0.16674241423606873, + "epoch": 1.4, + "learning_rate": 4.5232937809298734e-05, + "loss": 0.1652, + "step": 1476, + "task_loss": 0.1514662802219391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7438920560085364, + "compression_loss": 0.0, + "distillation_loss": 0.18036165833473206, + "epoch": 1.4, + "learning_rate": 4.5226677385152206e-05, + "loss": 0.1784, + "step": 1477, + "task_loss": 0.1612229347229004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7439162787498876, + "compression_loss": 0.0, + "distillation_loss": 0.05593033879995346, + "epoch": 1.4, + "learning_rate": 4.522041328673231e-05, + "loss": 0.0531, + "step": 1478, + "task_loss": 0.027741387486457825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7439404945186594, + "compression_loss": 0.0, + "distillation_loss": 0.07034408301115036, + "epoch": 1.4, + "learning_rate": 4.521414551517695e-05, + "loss": 0.0644, + "step": 1479, + "task_loss": 0.011257486417889595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7439647033158554, + "compression_loss": 0.0, + "distillation_loss": 0.2267724722623825, + "epoch": 1.41, + "learning_rate": 4.520787407162471e-05, + "loss": 0.2152, + "step": 1480, + "task_loss": 0.11058557778596878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7439889051424792, + "compression_loss": 0.0, + "distillation_loss": 0.07127425074577332, + "epoch": 1.41, + "learning_rate": 4.520159895721483e-05, + "loss": 0.0828, + "step": 1481, + "task_loss": 0.18699173629283905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7440130999995346, + "compression_loss": 0.0, + "distillation_loss": 0.1930515021085739, + "epoch": 1.41, + "learning_rate": 4.51953201730872e-05, + "loss": 0.1903, + "step": 1482, + "task_loss": 0.1656961441040039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7440372878880251, + "compression_loss": 0.0, + "distillation_loss": 0.15473680198192596, + "epoch": 1.41, + "learning_rate": 4.51890377203824e-05, + "loss": 0.145, + "step": 1483, + "task_loss": 0.05734732747077942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7440614688089547, + "compression_loss": 0.0, + "distillation_loss": 0.22360102832317352, + "epoch": 1.41, + "learning_rate": 4.518275160024167e-05, + "loss": 0.2159, + "step": 1484, + "task_loss": 0.14627224206924438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7440856427633268, + "compression_loss": 0.0, + "distillation_loss": 0.17431600391864777, + "epoch": 1.41, + "learning_rate": 4.5176461813806904e-05, + "loss": 0.1683, + "step": 1485, + "task_loss": 0.1145024299621582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7441098097521451, + "compression_loss": 0.0, + "distillation_loss": 0.14846046268939972, + "epoch": 1.41, + "learning_rate": 4.5170168362220686e-05, + "loss": 0.1464, + "step": 1486, + "task_loss": 0.12777863442897797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7441339697764136, + "compression_loss": 0.0, + "distillation_loss": 0.11251084506511688, + "epoch": 1.41, + "learning_rate": 4.516387124662624e-05, + "loss": 0.1045, + "step": 1487, + "task_loss": 0.032140232622623444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7441581228371354, + "compression_loss": 0.0, + "distillation_loss": 0.29036539793014526, + "epoch": 1.41, + "learning_rate": 4.5157570468167464e-05, + "loss": 0.2763, + "step": 1488, + "task_loss": 0.14925380051136017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7441822689353148, + "compression_loss": 0.0, + "distillation_loss": 0.06524545699357986, + "epoch": 1.41, + "learning_rate": 4.5151266027988946e-05, + "loss": 0.0624, + "step": 1489, + "task_loss": 0.03709521144628525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.744206408071955, + "compression_loss": 0.0, + "distillation_loss": 0.12188740074634552, + "epoch": 1.42, + "learning_rate": 4.51449579272359e-05, + "loss": 0.1168, + "step": 1490, + "task_loss": 0.07146313041448593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.74423054024806, + "compression_loss": 0.0, + "distillation_loss": 0.035563819110393524, + "epoch": 1.42, + "learning_rate": 4.5138646167054224e-05, + "loss": 0.0455, + "step": 1491, + "task_loss": 0.1353863775730133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7442546654646334, + "compression_loss": 0.0, + "distillation_loss": 0.18486768007278442, + "epoch": 1.42, + "learning_rate": 4.513233074859049e-05, + "loss": 0.1757, + "step": 1492, + "task_loss": 0.09314113855361938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7442787837226787, + "compression_loss": 0.0, + "distillation_loss": 0.11438624560832977, + "epoch": 1.42, + "learning_rate": 4.512601167299191e-05, + "loss": 0.1168, + "step": 1493, + "task_loss": 0.13881582021713257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7443028950231999, + "compression_loss": 0.0, + "distillation_loss": 0.03467051684856415, + "epoch": 1.42, + "learning_rate": 4.511968894140639e-05, + "loss": 0.0348, + "step": 1494, + "task_loss": 0.0362866148352623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7443269993672005, + "compression_loss": 0.0, + "distillation_loss": 0.15380631387233734, + "epoch": 1.42, + "learning_rate": 4.511336255498247e-05, + "loss": 0.1452, + "step": 1495, + "task_loss": 0.06820768862962723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7443510967556841, + "compression_loss": 0.0, + "distillation_loss": 0.057968005537986755, + "epoch": 1.42, + "learning_rate": 4.510703251486937e-05, + "loss": 0.0636, + "step": 1496, + "task_loss": 0.11408950388431549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7443751871896545, + "compression_loss": 0.0, + "distillation_loss": 0.1298018991947174, + "epoch": 1.42, + "learning_rate": 4.5100698822216984e-05, + "loss": 0.1205, + "step": 1497, + "task_loss": 0.03705129399895668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7443992706701154, + "compression_loss": 0.0, + "distillation_loss": 0.056667059659957886, + "epoch": 1.42, + "learning_rate": 4.509436147817585e-05, + "loss": 0.0534, + "step": 1498, + "task_loss": 0.02439264766871929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7444233471980704, + "compression_loss": 0.0, + "distillation_loss": 0.123785100877285, + "epoch": 1.42, + "learning_rate": 4.5088020483897184e-05, + "loss": 0.1167, + "step": 1499, + "task_loss": 0.05303073301911354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7444474167745232, + "compression_loss": 0.0, + "distillation_loss": 0.02330487221479416, + "epoch": 1.42, + "learning_rate": 4.508167584053285e-05, + "loss": 0.0425, + "step": 1500, + "task_loss": 0.21552759408950806 + }, + { + "epoch": 1.42, + "eval_accuracy": 0.911697247706422, + "eval_loss": 0.38651296496391296, + "eval_runtime": 18.7646, + "eval_samples_per_second": 46.471, + "eval_steps_per_second": 5.809, + "step": 1500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7444714794004775, + "compression_loss": 0.0, + "distillation_loss": 0.11092271655797958, + "epoch": 1.43, + "learning_rate": 4.507532754923537e-05, + "loss": 0.1073, + "step": 1501, + "task_loss": 0.07452307641506195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7444955350769371, + "compression_loss": 0.0, + "distillation_loss": 0.06650504469871521, + "epoch": 1.43, + "learning_rate": 4.506897561115797e-05, + "loss": 0.0621, + "step": 1502, + "task_loss": 0.02240423485636711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7445195838049055, + "compression_loss": 0.0, + "distillation_loss": 0.16672199964523315, + "epoch": 1.43, + "learning_rate": 4.506262002745449e-05, + "loss": 0.1695, + "step": 1503, + "task_loss": 0.194443479180336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7445436255853864, + "compression_loss": 0.0, + "distillation_loss": 0.07140068709850311, + "epoch": 1.43, + "learning_rate": 4.505626079927947e-05, + "loss": 0.0655, + "step": 1504, + "task_loss": 0.012316873297095299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7445676604193836, + "compression_loss": 0.0, + "distillation_loss": 0.21132929623126984, + "epoch": 1.43, + "learning_rate": 4.504989792778808e-05, + "loss": 0.2003, + "step": 1505, + "task_loss": 0.10102443397045135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7445916883079007, + "compression_loss": 0.0, + "distillation_loss": 0.1785595715045929, + "epoch": 1.43, + "learning_rate": 4.504353141413616e-05, + "loss": 0.1787, + "step": 1506, + "task_loss": 0.18019554018974304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7446157092519414, + "compression_loss": 0.0, + "distillation_loss": 0.25665420293807983, + "epoch": 1.43, + "learning_rate": 4.5037161259480246e-05, + "loss": 0.2455, + "step": 1507, + "task_loss": 0.14560416340827942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7446397232525094, + "compression_loss": 0.0, + "distillation_loss": 0.04967069625854492, + "epoch": 1.43, + "learning_rate": 4.5030787464977476e-05, + "loss": 0.0577, + "step": 1508, + "task_loss": 0.13003495335578918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7446637303106083, + "compression_loss": 0.0, + "distillation_loss": 0.17093107104301453, + "epoch": 1.43, + "learning_rate": 4.50244100317857e-05, + "loss": 0.171, + "step": 1509, + "task_loss": 0.17199328541755676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7446877304272419, + "compression_loss": 0.0, + "distillation_loss": 0.06911082565784454, + "epoch": 1.43, + "learning_rate": 4.5018028961063394e-05, + "loss": 0.0759, + "step": 1510, + "task_loss": 0.13733558356761932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7447117236034139, + "compression_loss": 0.0, + "distillation_loss": 0.21413826942443848, + "epoch": 1.43, + "learning_rate": 4.501164425396973e-05, + "loss": 0.2076, + "step": 1511, + "task_loss": 0.1490751951932907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7447357098401277, + "compression_loss": 0.0, + "distillation_loss": 0.1935446858406067, + "epoch": 1.44, + "learning_rate": 4.5005255911664507e-05, + "loss": 0.1988, + "step": 1512, + "task_loss": 0.2461317479610443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7447596891383874, + "compression_loss": 0.0, + "distillation_loss": 0.1821810007095337, + "epoch": 1.44, + "learning_rate": 4.49988639353082e-05, + "loss": 0.1794, + "step": 1513, + "task_loss": 0.15411117672920227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7447836614991964, + "compression_loss": 0.0, + "distillation_loss": 0.16811317205429077, + "epoch": 1.44, + "learning_rate": 4.4992468326061944e-05, + "loss": 0.1679, + "step": 1514, + "task_loss": 0.16646404564380646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7448076269235584, + "compression_loss": 0.0, + "distillation_loss": 0.13295818865299225, + "epoch": 1.44, + "learning_rate": 4.498606908508754e-05, + "loss": 0.1242, + "step": 1515, + "task_loss": 0.045377686619758606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7448315854124772, + "compression_loss": 0.0, + "distillation_loss": 0.16021104156970978, + "epoch": 1.44, + "learning_rate": 4.4979666213547414e-05, + "loss": 0.1632, + "step": 1516, + "task_loss": 0.19016344845294952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7448555369669564, + "compression_loss": 0.0, + "distillation_loss": 0.1837073564529419, + "epoch": 1.44, + "learning_rate": 4.497325971260471e-05, + "loss": 0.1772, + "step": 1517, + "task_loss": 0.11903562396764755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7448794815879997, + "compression_loss": 0.0, + "distillation_loss": 0.08035746216773987, + "epoch": 1.44, + "learning_rate": 4.496684958342319e-05, + "loss": 0.076, + "step": 1518, + "task_loss": 0.03652361035346985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7449034192766107, + "compression_loss": 0.0, + "distillation_loss": 0.04287096485495567, + "epoch": 1.44, + "learning_rate": 4.4960435827167266e-05, + "loss": 0.0395, + "step": 1519, + "task_loss": 0.009310789406299591 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7449273500337933, + "compression_loss": 0.0, + "distillation_loss": 0.04646936431527138, + "epoch": 1.44, + "learning_rate": 4.495401844500205e-05, + "loss": 0.0439, + "step": 1520, + "task_loss": 0.02119472809135914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.744951273860551, + "compression_loss": 0.0, + "distillation_loss": 0.20926059782505035, + "epoch": 1.44, + "learning_rate": 4.494759743809329e-05, + "loss": 0.2045, + "step": 1521, + "task_loss": 0.16188855469226837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7449751907578875, + "compression_loss": 0.0, + "distillation_loss": 0.11345314979553223, + "epoch": 1.45, + "learning_rate": 4.494117280760739e-05, + "loss": 0.1181, + "step": 1522, + "task_loss": 0.16004882752895355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7449991007268065, + "compression_loss": 0.0, + "distillation_loss": 0.16868895292282104, + "epoch": 1.45, + "learning_rate": 4.49347445547114e-05, + "loss": 0.1641, + "step": 1523, + "task_loss": 0.12286947667598724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7450230037683118, + "compression_loss": 0.0, + "distillation_loss": 0.17652767896652222, + "epoch": 1.45, + "learning_rate": 4.4928312680573064e-05, + "loss": 0.168, + "step": 1524, + "task_loss": 0.09076812118291855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7450468998834069, + "compression_loss": 0.0, + "distillation_loss": 0.05617145821452141, + "epoch": 1.45, + "learning_rate": 4.492187718636075e-05, + "loss": 0.0604, + "step": 1525, + "task_loss": 0.09820869565010071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7450707890730955, + "compression_loss": 0.0, + "distillation_loss": 0.216825470328331, + "epoch": 1.45, + "learning_rate": 4.49154380732435e-05, + "loss": 0.2122, + "step": 1526, + "task_loss": 0.1709357053041458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7450946713383815, + "compression_loss": 0.0, + "distillation_loss": 0.4693998694419861, + "epoch": 1.45, + "learning_rate": 4.490899534239101e-05, + "loss": 0.4483, + "step": 1527, + "task_loss": 0.2588621973991394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7451185466802683, + "compression_loss": 0.0, + "distillation_loss": 0.2971939742565155, + "epoch": 1.45, + "learning_rate": 4.490254899497364e-05, + "loss": 0.2901, + "step": 1528, + "task_loss": 0.22666522860527039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7451424150997598, + "compression_loss": 0.0, + "distillation_loss": 0.25744807720184326, + "epoch": 1.45, + "learning_rate": 4.4896099032162386e-05, + "loss": 0.2487, + "step": 1529, + "task_loss": 0.17000789940357208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7451662765978594, + "compression_loss": 0.0, + "distillation_loss": 0.13210158050060272, + "epoch": 1.45, + "learning_rate": 4.488964545512892e-05, + "loss": 0.1324, + "step": 1530, + "task_loss": 0.13510762155056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7451901311755711, + "compression_loss": 0.0, + "distillation_loss": 0.2513887882232666, + "epoch": 1.45, + "learning_rate": 4.488318826504557e-05, + "loss": 0.2502, + "step": 1531, + "task_loss": 0.23999468982219696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7452139788338985, + "compression_loss": 0.0, + "distillation_loss": 0.2592649757862091, + "epoch": 1.45, + "learning_rate": 4.4876727463085324e-05, + "loss": 0.2465, + "step": 1532, + "task_loss": 0.13125677406787872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7452378195738452, + "compression_loss": 0.0, + "distillation_loss": 0.26413312554359436, + "epoch": 1.46, + "learning_rate": 4.487026305042179e-05, + "loss": 0.2514, + "step": 1533, + "task_loss": 0.13711029291152954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7452616533964148, + "compression_loss": 0.0, + "distillation_loss": 0.3824172019958496, + "epoch": 1.46, + "learning_rate": 4.4863795028229286e-05, + "loss": 0.3723, + "step": 1534, + "task_loss": 0.281066358089447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7452854803026112, + "compression_loss": 0.0, + "distillation_loss": 0.22528879344463348, + "epoch": 1.46, + "learning_rate": 4.4857323397682746e-05, + "loss": 0.2242, + "step": 1535, + "task_loss": 0.21392083168029785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.745309300293438, + "compression_loss": 0.0, + "distillation_loss": 0.12722215056419373, + "epoch": 1.46, + "learning_rate": 4.485084815995778e-05, + "loss": 0.1359, + "step": 1536, + "task_loss": 0.2140396535396576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7453331133698988, + "compression_loss": 0.0, + "distillation_loss": 0.14468204975128174, + "epoch": 1.46, + "learning_rate": 4.484436931623064e-05, + "loss": 0.1436, + "step": 1537, + "task_loss": 0.1337466537952423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7453569195329973, + "compression_loss": 0.0, + "distillation_loss": 0.41913074254989624, + "epoch": 1.46, + "learning_rate": 4.4837886867678245e-05, + "loss": 0.4061, + "step": 1538, + "task_loss": 0.28871697187423706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7453807187837374, + "compression_loss": 0.0, + "distillation_loss": 0.12353764474391937, + "epoch": 1.46, + "learning_rate": 4.4831400815478164e-05, + "loss": 0.117, + "step": 1539, + "task_loss": 0.05776692181825638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7454045111231224, + "compression_loss": 0.0, + "distillation_loss": 0.07819847762584686, + "epoch": 1.46, + "learning_rate": 4.482491116080861e-05, + "loss": 0.0901, + "step": 1540, + "task_loss": 0.19672957062721252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7454282965521564, + "compression_loss": 0.0, + "distillation_loss": 0.3406957983970642, + "epoch": 1.46, + "learning_rate": 4.4818417904848466e-05, + "loss": 0.3262, + "step": 1541, + "task_loss": 0.19611957669258118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7454520750718426, + "compression_loss": 0.0, + "distillation_loss": 0.1422961801290512, + "epoch": 1.46, + "learning_rate": 4.481192104877726e-05, + "loss": 0.1342, + "step": 1542, + "task_loss": 0.060906361788511276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7454758466831852, + "compression_loss": 0.0, + "distillation_loss": 0.05376805365085602, + "epoch": 1.47, + "learning_rate": 4.480542059377519e-05, + "loss": 0.0542, + "step": 1543, + "task_loss": 0.05844269320368767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7454996113871876, + "compression_loss": 0.0, + "distillation_loss": 0.20722997188568115, + "epoch": 1.47, + "learning_rate": 4.479891654102307e-05, + "loss": 0.2045, + "step": 1544, + "task_loss": 0.1803072690963745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7455233691848534, + "compression_loss": 0.0, + "distillation_loss": 0.2142692506313324, + "epoch": 1.47, + "learning_rate": 4.4792408891702426e-05, + "loss": 0.2035, + "step": 1545, + "task_loss": 0.10616156458854675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7455471200771865, + "compression_loss": 0.0, + "distillation_loss": 0.14973235130310059, + "epoch": 1.47, + "learning_rate": 4.4785897646995376e-05, + "loss": 0.1475, + "step": 1546, + "task_loss": 0.12703794240951538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7455708640651905, + "compression_loss": 0.0, + "distillation_loss": 0.18545937538146973, + "epoch": 1.47, + "learning_rate": 4.477938280808473e-05, + "loss": 0.184, + "step": 1547, + "task_loss": 0.17124545574188232 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.745594601149869, + "compression_loss": 0.0, + "distillation_loss": 0.1463180035352707, + "epoch": 1.47, + "learning_rate": 4.4772864376153936e-05, + "loss": 0.1379, + "step": 1548, + "task_loss": 0.06201765686273575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7456183313322257, + "compression_loss": 0.0, + "distillation_loss": 0.1488766223192215, + "epoch": 1.47, + "learning_rate": 4.4766342352387106e-05, + "loss": 0.1424, + "step": 1549, + "task_loss": 0.08362394571304321 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7456420546132644, + "compression_loss": 0.0, + "distillation_loss": 0.09970419108867645, + "epoch": 1.47, + "learning_rate": 4.475981673796899e-05, + "loss": 0.1015, + "step": 1550, + "task_loss": 0.11792711168527603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7456657709939887, + "compression_loss": 0.0, + "distillation_loss": 0.17068204283714294, + "epoch": 1.47, + "learning_rate": 4.475328753408499e-05, + "loss": 0.1719, + "step": 1551, + "task_loss": 0.18303748965263367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7456894804754023, + "compression_loss": 0.0, + "distillation_loss": 0.05580803379416466, + "epoch": 1.47, + "learning_rate": 4.474675474192119e-05, + "loss": 0.0535, + "step": 1552, + "task_loss": 0.03291773051023483 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7457131830585089, + "compression_loss": 0.0, + "distillation_loss": 0.033489570021629333, + "epoch": 1.47, + "learning_rate": 4.4740218362664276e-05, + "loss": 0.0308, + "step": 1553, + "task_loss": 0.006132926791906357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.745736878744312, + "compression_loss": 0.0, + "distillation_loss": 0.0999932736158371, + "epoch": 1.48, + "learning_rate": 4.473367839750165e-05, + "loss": 0.1088, + "step": 1554, + "task_loss": 0.18756511807441711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7457605675338156, + "compression_loss": 0.0, + "distillation_loss": 0.1821468025445938, + "epoch": 1.48, + "learning_rate": 4.4727134847621276e-05, + "loss": 0.174, + "step": 1555, + "task_loss": 0.1006765216588974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7457842494280232, + "compression_loss": 0.0, + "distillation_loss": 0.15182924270629883, + "epoch": 1.48, + "learning_rate": 4.4720587714211863e-05, + "loss": 0.1564, + "step": 1556, + "task_loss": 0.1978447288274765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7458079244279385, + "compression_loss": 0.0, + "distillation_loss": 0.28013527393341064, + "epoch": 1.48, + "learning_rate": 4.471403699846272e-05, + "loss": 0.2684, + "step": 1557, + "task_loss": 0.16278645396232605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7458315925345651, + "compression_loss": 0.0, + "distillation_loss": 0.023632332682609558, + "epoch": 1.48, + "learning_rate": 4.470748270156381e-05, + "loss": 0.0296, + "step": 1558, + "task_loss": 0.08337298780679703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7458552537489069, + "compression_loss": 0.0, + "distillation_loss": 0.07167325913906097, + "epoch": 1.48, + "learning_rate": 4.4700924824705745e-05, + "loss": 0.0692, + "step": 1559, + "task_loss": 0.047288812696933746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7458789080719673, + "compression_loss": 0.0, + "distillation_loss": 0.053809523582458496, + "epoch": 1.48, + "learning_rate": 4.469436336907982e-05, + "loss": 0.0576, + "step": 1560, + "task_loss": 0.09203386306762695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7459025555047503, + "compression_loss": 0.0, + "distillation_loss": 0.08844821155071259, + "epoch": 1.48, + "learning_rate": 4.4687798335877936e-05, + "loss": 0.088, + "step": 1561, + "task_loss": 0.08441342413425446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7459261960482593, + "compression_loss": 0.0, + "distillation_loss": 0.053808607161045074, + "epoch": 1.48, + "learning_rate": 4.4681229726292664e-05, + "loss": 0.0586, + "step": 1562, + "task_loss": 0.1021692305803299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7459498297034981, + "compression_loss": 0.0, + "distillation_loss": 0.24225428700447083, + "epoch": 1.48, + "learning_rate": 4.4674657541517227e-05, + "loss": 0.2405, + "step": 1563, + "task_loss": 0.22495496273040771 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7459734564714704, + "compression_loss": 0.0, + "distillation_loss": 0.1985190361738205, + "epoch": 1.49, + "learning_rate": 4.466808178274549e-05, + "loss": 0.1868, + "step": 1564, + "task_loss": 0.08100724220275879 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7459970763531799, + "compression_loss": 0.0, + "distillation_loss": 0.2816087603569031, + "epoch": 1.49, + "learning_rate": 4.4661502451171975e-05, + "loss": 0.2677, + "step": 1565, + "task_loss": 0.14259889721870422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7460206893496302, + "compression_loss": 0.0, + "distillation_loss": 0.16295886039733887, + "epoch": 1.49, + "learning_rate": 4.465491954799186e-05, + "loss": 0.1618, + "step": 1566, + "task_loss": 0.1518009454011917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.746044295461825, + "compression_loss": 0.0, + "distillation_loss": 0.17871344089508057, + "epoch": 1.49, + "learning_rate": 4.4648333074400936e-05, + "loss": 0.171, + "step": 1567, + "task_loss": 0.10131989419460297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7460678946907681, + "compression_loss": 0.0, + "distillation_loss": 0.10870079696178436, + "epoch": 1.49, + "learning_rate": 4.464174303159569e-05, + "loss": 0.1071, + "step": 1568, + "task_loss": 0.09272871166467667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.746091487037463, + "compression_loss": 0.0, + "distillation_loss": 0.11727038025856018, + "epoch": 1.49, + "learning_rate": 4.463514942077323e-05, + "loss": 0.1156, + "step": 1569, + "task_loss": 0.10054733604192734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7461150725029135, + "compression_loss": 0.0, + "distillation_loss": 0.03882730007171631, + "epoch": 1.49, + "learning_rate": 4.4628552243131304e-05, + "loss": 0.0442, + "step": 1570, + "task_loss": 0.09213192760944366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7461386510881233, + "compression_loss": 0.0, + "distillation_loss": 0.09602994471788406, + "epoch": 1.49, + "learning_rate": 4.462195149986833e-05, + "loss": 0.09, + "step": 1571, + "task_loss": 0.03555634990334511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7461622227940959, + "compression_loss": 0.0, + "distillation_loss": 0.2854123115539551, + "epoch": 1.49, + "learning_rate": 4.4615347192183375e-05, + "loss": 0.2702, + "step": 1572, + "task_loss": 0.13358475267887115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7461857876218352, + "compression_loss": 0.0, + "distillation_loss": 0.08520927280187607, + "epoch": 1.49, + "learning_rate": 4.4608739321276126e-05, + "loss": 0.0797, + "step": 1573, + "task_loss": 0.03009282425045967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7462093455723449, + "compression_loss": 0.0, + "distillation_loss": 0.074330173432827, + "epoch": 1.49, + "learning_rate": 4.4602127888346944e-05, + "loss": 0.0698, + "step": 1574, + "task_loss": 0.028723739087581635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7462328966466284, + "compression_loss": 0.0, + "distillation_loss": 0.06121968850493431, + "epoch": 1.5, + "learning_rate": 4.459551289459684e-05, + "loss": 0.0707, + "step": 1575, + "task_loss": 0.15602229535579681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7462564408456897, + "compression_loss": 0.0, + "distillation_loss": 0.14412710070610046, + "epoch": 1.5, + "learning_rate": 4.4588894341227426e-05, + "loss": 0.1395, + "step": 1576, + "task_loss": 0.09811600297689438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7462799781705323, + "compression_loss": 0.0, + "distillation_loss": 0.2897692918777466, + "epoch": 1.5, + "learning_rate": 4.4582272229441024e-05, + "loss": 0.2797, + "step": 1577, + "task_loss": 0.1895175725221634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7463035086221598, + "compression_loss": 0.0, + "distillation_loss": 0.06360229849815369, + "epoch": 1.5, + "learning_rate": 4.457564656044056e-05, + "loss": 0.0622, + "step": 1578, + "task_loss": 0.04995894804596901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7463270322015761, + "compression_loss": 0.0, + "distillation_loss": 0.0988512635231018, + "epoch": 1.5, + "learning_rate": 4.456901733542962e-05, + "loss": 0.1002, + "step": 1579, + "task_loss": 0.11196555197238922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7463505489097848, + "compression_loss": 0.0, + "distillation_loss": 0.11553806811571121, + "epoch": 1.5, + "learning_rate": 4.4562384555612436e-05, + "loss": 0.109, + "step": 1580, + "task_loss": 0.050435107201337814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7463740587477896, + "compression_loss": 0.0, + "distillation_loss": 0.4326043128967285, + "epoch": 1.5, + "learning_rate": 4.455574822219388e-05, + "loss": 0.4232, + "step": 1581, + "task_loss": 0.3386293053627014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7463975617165942, + "compression_loss": 0.0, + "distillation_loss": 0.2798699736595154, + "epoch": 1.5, + "learning_rate": 4.454910833637949e-05, + "loss": 0.2735, + "step": 1582, + "task_loss": 0.21614964306354523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7464210578172021, + "compression_loss": 0.0, + "distillation_loss": 0.1274246722459793, + "epoch": 1.5, + "learning_rate": 4.454246489937541e-05, + "loss": 0.1217, + "step": 1583, + "task_loss": 0.07030543684959412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7464445470506172, + "compression_loss": 0.0, + "distillation_loss": 0.07745787501335144, + "epoch": 1.5, + "learning_rate": 4.4535817912388466e-05, + "loss": 0.081, + "step": 1584, + "task_loss": 0.11285798251628876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7464680294178432, + "compression_loss": 0.0, + "distillation_loss": 0.2271667718887329, + "epoch": 1.51, + "learning_rate": 4.4529167376626116e-05, + "loss": 0.2175, + "step": 1585, + "task_loss": 0.13050048053264618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7464915049198835, + "compression_loss": 0.0, + "distillation_loss": 0.3326779007911682, + "epoch": 1.51, + "learning_rate": 4.4522513293296456e-05, + "loss": 0.3292, + "step": 1586, + "task_loss": 0.2977886497974396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7465149735577421, + "compression_loss": 0.0, + "distillation_loss": 0.04014962911605835, + "epoch": 1.51, + "learning_rate": 4.451585566360823e-05, + "loss": 0.052, + "step": 1587, + "task_loss": 0.15873615443706512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7465384353324225, + "compression_loss": 0.0, + "distillation_loss": 0.154592826962471, + "epoch": 1.51, + "learning_rate": 4.450919448877084e-05, + "loss": 0.1459, + "step": 1588, + "task_loss": 0.06723834574222565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7465618902449284, + "compression_loss": 0.0, + "distillation_loss": 0.05010029301047325, + "epoch": 1.51, + "learning_rate": 4.4502529769994314e-05, + "loss": 0.0464, + "step": 1589, + "task_loss": 0.012682372704148293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7465853382962635, + "compression_loss": 0.0, + "distillation_loss": 0.08115085959434509, + "epoch": 1.51, + "learning_rate": 4.449586150848934e-05, + "loss": 0.0893, + "step": 1590, + "task_loss": 0.1630755513906479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7466087794874315, + "compression_loss": 0.0, + "distillation_loss": 0.21416254341602325, + "epoch": 1.51, + "learning_rate": 4.44891897054672e-05, + "loss": 0.2044, + "step": 1591, + "task_loss": 0.11678683757781982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7466322138194361, + "compression_loss": 0.0, + "distillation_loss": 0.17701666057109833, + "epoch": 1.51, + "learning_rate": 4.4482514362139915e-05, + "loss": 0.1665, + "step": 1592, + "task_loss": 0.07226403057575226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.746655641293281, + "compression_loss": 0.0, + "distillation_loss": 0.09402104467153549, + "epoch": 1.51, + "learning_rate": 4.4475835479720065e-05, + "loss": 0.0962, + "step": 1593, + "task_loss": 0.11615651845932007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7466790619099698, + "compression_loss": 0.0, + "distillation_loss": 0.3029029071331024, + "epoch": 1.51, + "learning_rate": 4.4469153059420895e-05, + "loss": 0.2929, + "step": 1594, + "task_loss": 0.20262876152992249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7467024756705063, + "compression_loss": 0.0, + "distillation_loss": 0.17734010517597198, + "epoch": 1.51, + "learning_rate": 4.4462467102456305e-05, + "loss": 0.1858, + "step": 1595, + "task_loss": 0.26218533515930176 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.746725882575894, + "compression_loss": 0.0, + "distillation_loss": 0.24488165974617004, + "epoch": 1.52, + "learning_rate": 4.4455777610040846e-05, + "loss": 0.252, + "step": 1596, + "task_loss": 0.3160625696182251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7467492826271367, + "compression_loss": 0.0, + "distillation_loss": 0.3032427430152893, + "epoch": 1.52, + "learning_rate": 4.444908458338968e-05, + "loss": 0.2889, + "step": 1597, + "task_loss": 0.16029399633407593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7467726758252381, + "compression_loss": 0.0, + "distillation_loss": 0.22814524173736572, + "epoch": 1.52, + "learning_rate": 4.4442388023718624e-05, + "loss": 0.231, + "step": 1598, + "task_loss": 0.25681257247924805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7467960621712019, + "compression_loss": 0.0, + "distillation_loss": 0.2185569703578949, + "epoch": 1.52, + "learning_rate": 4.443568793224415e-05, + "loss": 0.221, + "step": 1599, + "task_loss": 0.24304306507110596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7468194416660316, + "compression_loss": 0.0, + "distillation_loss": 0.1327572613954544, + "epoch": 1.52, + "learning_rate": 4.4428984310183364e-05, + "loss": 0.1352, + "step": 1600, + "task_loss": 0.15751878917217255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7468428143107311, + "compression_loss": 0.0, + "distillation_loss": 0.2773556709289551, + "epoch": 1.52, + "learning_rate": 4.4422277158754005e-05, + "loss": 0.2637, + "step": 1601, + "task_loss": 0.14039871096611023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.746866180106304, + "compression_loss": 0.0, + "distillation_loss": 0.2993885278701782, + "epoch": 1.52, + "learning_rate": 4.441556647917446e-05, + "loss": 0.2958, + "step": 1602, + "task_loss": 0.26360023021698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.746889539053754, + "compression_loss": 0.0, + "distillation_loss": 0.14780689775943756, + "epoch": 1.52, + "learning_rate": 4.440885227266376e-05, + "loss": 0.1404, + "step": 1603, + "task_loss": 0.07409326732158661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7469128911540847, + "compression_loss": 0.0, + "distillation_loss": 0.1367529034614563, + "epoch": 1.52, + "learning_rate": 4.440213454044158e-05, + "loss": 0.1366, + "step": 1604, + "task_loss": 0.13533815741539001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7469362364082999, + "compression_loss": 0.0, + "distillation_loss": 0.11874578893184662, + "epoch": 1.52, + "learning_rate": 4.43954132837282e-05, + "loss": 0.12, + "step": 1605, + "task_loss": 0.1314026564359665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7469595748174032, + "compression_loss": 0.0, + "distillation_loss": 0.09664209187030792, + "epoch": 1.53, + "learning_rate": 4.43886885037446e-05, + "loss": 0.0971, + "step": 1606, + "task_loss": 0.10162815451622009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7469829063823983, + "compression_loss": 0.0, + "distillation_loss": 0.08660341799259186, + "epoch": 1.53, + "learning_rate": 4.438196020171235e-05, + "loss": 0.0801, + "step": 1607, + "task_loss": 0.021267052739858627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7470062311042888, + "compression_loss": 0.0, + "distillation_loss": 0.08157442510128021, + "epoch": 1.53, + "learning_rate": 4.437522837885369e-05, + "loss": 0.0841, + "step": 1608, + "task_loss": 0.10718289017677307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7470295489840786, + "compression_loss": 0.0, + "distillation_loss": 0.13874168694019318, + "epoch": 1.53, + "learning_rate": 4.436849303639148e-05, + "loss": 0.1323, + "step": 1609, + "task_loss": 0.07445013523101807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7470528600227712, + "compression_loss": 0.0, + "distillation_loss": 0.18086206912994385, + "epoch": 1.53, + "learning_rate": 4.436175417554923e-05, + "loss": 0.1798, + "step": 1610, + "task_loss": 0.16998088359832764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7470761642213704, + "compression_loss": 0.0, + "distillation_loss": 0.11296135932207108, + "epoch": 1.53, + "learning_rate": 4.4355011797551086e-05, + "loss": 0.1093, + "step": 1611, + "task_loss": 0.07632420212030411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7470994615808797, + "compression_loss": 0.0, + "distillation_loss": 0.10066385567188263, + "epoch": 1.53, + "learning_rate": 4.4348265903621844e-05, + "loss": 0.0935, + "step": 1612, + "task_loss": 0.028924619778990746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.747122752102303, + "compression_loss": 0.0, + "distillation_loss": 0.0938473492860794, + "epoch": 1.53, + "learning_rate": 4.4341516494986904e-05, + "loss": 0.107, + "step": 1613, + "task_loss": 0.22503921389579773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7471460357866438, + "compression_loss": 0.0, + "distillation_loss": 0.06829311698675156, + "epoch": 1.53, + "learning_rate": 4.433476357287235e-05, + "loss": 0.064, + "step": 1614, + "task_loss": 0.025221938267350197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7471693126349059, + "compression_loss": 0.0, + "distillation_loss": 0.29679104685783386, + "epoch": 1.53, + "learning_rate": 4.432800713850488e-05, + "loss": 0.3026, + "step": 1615, + "task_loss": 0.3548710346221924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.747192582648093, + "compression_loss": 0.0, + "distillation_loss": 0.05282590910792351, + "epoch": 1.53, + "learning_rate": 4.432124719311182e-05, + "loss": 0.0581, + "step": 1616, + "task_loss": 0.10553567111492157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7472158458272087, + "compression_loss": 0.0, + "distillation_loss": 0.07523874193429947, + "epoch": 1.54, + "learning_rate": 4.431448373792116e-05, + "loss": 0.0703, + "step": 1617, + "task_loss": 0.025965360924601555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7472391021732567, + "compression_loss": 0.0, + "distillation_loss": 0.24489791691303253, + "epoch": 1.54, + "learning_rate": 4.430771677416151e-05, + "loss": 0.245, + "step": 1618, + "task_loss": 0.24620510637760162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7472623516872408, + "compression_loss": 0.0, + "distillation_loss": 0.0306411050260067, + "epoch": 1.54, + "learning_rate": 4.430094630306212e-05, + "loss": 0.0361, + "step": 1619, + "task_loss": 0.08524684607982635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7472855943701644, + "compression_loss": 0.0, + "distillation_loss": 0.1247522383928299, + "epoch": 1.54, + "learning_rate": 4.429417232585288e-05, + "loss": 0.1176, + "step": 1620, + "task_loss": 0.05337625741958618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7473088302230314, + "compression_loss": 0.0, + "distillation_loss": 0.17402300238609314, + "epoch": 1.54, + "learning_rate": 4.428739484376431e-05, + "loss": 0.1722, + "step": 1621, + "task_loss": 0.1557716578245163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7473320592468454, + "compression_loss": 0.0, + "distillation_loss": 0.08097216486930847, + "epoch": 1.54, + "learning_rate": 4.4280613858027584e-05, + "loss": 0.0769, + "step": 1622, + "task_loss": 0.039864055812358856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7473552814426102, + "compression_loss": 0.0, + "distillation_loss": 0.2641907334327698, + "epoch": 1.54, + "learning_rate": 4.427382936987449e-05, + "loss": 0.2684, + "step": 1623, + "task_loss": 0.30657488107681274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7473784968113295, + "compression_loss": 0.0, + "distillation_loss": 0.30244994163513184, + "epoch": 1.54, + "learning_rate": 4.426704138053747e-05, + "loss": 0.2928, + "step": 1624, + "task_loss": 0.20630168914794922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7474017053540067, + "compression_loss": 0.0, + "distillation_loss": 0.08166351914405823, + "epoch": 1.54, + "learning_rate": 4.426024989124959e-05, + "loss": 0.0787, + "step": 1625, + "task_loss": 0.05223226174712181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7474249070716458, + "compression_loss": 0.0, + "distillation_loss": 0.044893529266119, + "epoch": 1.54, + "learning_rate": 4.425345490324456e-05, + "loss": 0.052, + "step": 1626, + "task_loss": 0.11637747287750244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7474481019652502, + "compression_loss": 0.0, + "distillation_loss": 0.17937017977237701, + "epoch": 1.55, + "learning_rate": 4.424665641775673e-05, + "loss": 0.1818, + "step": 1627, + "task_loss": 0.20323118567466736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7474712900358239, + "compression_loss": 0.0, + "distillation_loss": 0.24269092082977295, + "epoch": 1.55, + "learning_rate": 4.4239854436021056e-05, + "loss": 0.2425, + "step": 1628, + "task_loss": 0.24110905826091766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7474944712843703, + "compression_loss": 0.0, + "distillation_loss": 0.13650964200496674, + "epoch": 1.55, + "learning_rate": 4.423304895927317e-05, + "loss": 0.1435, + "step": 1629, + "task_loss": 0.20650048553943634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7475176457118933, + "compression_loss": 0.0, + "distillation_loss": 0.15174457430839539, + "epoch": 1.55, + "learning_rate": 4.4226239988749305e-05, + "loss": 0.1528, + "step": 1630, + "task_loss": 0.16226869821548462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7475408133193964, + "compression_loss": 0.0, + "distillation_loss": 0.17601847648620605, + "epoch": 1.55, + "learning_rate": 4.4219427525686366e-05, + "loss": 0.1722, + "step": 1631, + "task_loss": 0.13755454123020172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7475639741078833, + "compression_loss": 0.0, + "distillation_loss": 0.09047359228134155, + "epoch": 1.55, + "learning_rate": 4.421261157132185e-05, + "loss": 0.0868, + "step": 1632, + "task_loss": 0.053494758903980255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7475871280783579, + "compression_loss": 0.0, + "distillation_loss": 0.25244152545928955, + "epoch": 1.55, + "learning_rate": 4.4205792126893905e-05, + "loss": 0.2515, + "step": 1633, + "task_loss": 0.24255844950675964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7476102752318236, + "compression_loss": 0.0, + "distillation_loss": 0.029148060828447342, + "epoch": 1.55, + "learning_rate": 4.4198969193641324e-05, + "loss": 0.0272, + "step": 1634, + "task_loss": 0.009713640436530113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7476334155692843, + "compression_loss": 0.0, + "distillation_loss": 0.1328439861536026, + "epoch": 1.55, + "learning_rate": 4.4192142772803535e-05, + "loss": 0.1342, + "step": 1635, + "task_loss": 0.14683926105499268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7476565490917435, + "compression_loss": 0.0, + "distillation_loss": 0.20217649638652802, + "epoch": 1.55, + "learning_rate": 4.4185312865620575e-05, + "loss": 0.192, + "step": 1636, + "task_loss": 0.1007172167301178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.747679675800205, + "compression_loss": 0.0, + "distillation_loss": 0.25275999307632446, + "epoch": 1.55, + "learning_rate": 4.417847947333314e-05, + "loss": 0.2413, + "step": 1637, + "task_loss": 0.1383301168680191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7477027956956724, + "compression_loss": 0.0, + "distillation_loss": 0.13600948452949524, + "epoch": 1.56, + "learning_rate": 4.417164259718254e-05, + "loss": 0.1313, + "step": 1638, + "task_loss": 0.08905670791864395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7477259087791495, + "compression_loss": 0.0, + "distillation_loss": 0.24503761529922485, + "epoch": 1.56, + "learning_rate": 4.416480223841073e-05, + "loss": 0.2425, + "step": 1639, + "task_loss": 0.21960334479808807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7477490150516399, + "compression_loss": 0.0, + "distillation_loss": 0.14646950364112854, + "epoch": 1.56, + "learning_rate": 4.4157958398260294e-05, + "loss": 0.147, + "step": 1640, + "task_loss": 0.1517796665430069 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7477721145141473, + "compression_loss": 0.0, + "distillation_loss": 0.04808887839317322, + "epoch": 1.56, + "learning_rate": 4.415111107797445e-05, + "loss": 0.0487, + "step": 1641, + "task_loss": 0.05420362949371338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7477952071676754, + "compression_loss": 0.0, + "distillation_loss": 0.2355082482099533, + "epoch": 1.56, + "learning_rate": 4.414426027879705e-05, + "loss": 0.2322, + "step": 1642, + "task_loss": 0.20212513208389282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7478182930132279, + "compression_loss": 0.0, + "distillation_loss": 0.17253591120243073, + "epoch": 1.56, + "learning_rate": 4.413740600197257e-05, + "loss": 0.1679, + "step": 1643, + "task_loss": 0.12654927372932434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7478413720518083, + "compression_loss": 0.0, + "distillation_loss": 0.30063819885253906, + "epoch": 1.56, + "learning_rate": 4.413054824874612e-05, + "loss": 0.2869, + "step": 1644, + "task_loss": 0.16371384263038635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7478644442844206, + "compression_loss": 0.0, + "distillation_loss": 0.16720086336135864, + "epoch": 1.56, + "learning_rate": 4.412368702036345e-05, + "loss": 0.1603, + "step": 1645, + "task_loss": 0.09819523990154266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7478875097120682, + "compression_loss": 0.0, + "distillation_loss": 0.12360204756259918, + "epoch": 1.56, + "learning_rate": 4.4116822318070925e-05, + "loss": 0.116, + "step": 1646, + "task_loss": 0.047833651304244995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.747910568335755, + "compression_loss": 0.0, + "distillation_loss": 0.08152472972869873, + "epoch": 1.56, + "learning_rate": 4.4109954143115565e-05, + "loss": 0.0755, + "step": 1647, + "task_loss": 0.0212980005890131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7479336201564845, + "compression_loss": 0.0, + "distillation_loss": 0.11956100910902023, + "epoch": 1.57, + "learning_rate": 4.4103082496745e-05, + "loss": 0.1177, + "step": 1648, + "task_loss": 0.10143022984266281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7479566651752604, + "compression_loss": 0.0, + "distillation_loss": 0.13289915025234222, + "epoch": 1.57, + "learning_rate": 4.40962073802075e-05, + "loss": 0.1218, + "step": 1649, + "task_loss": 0.02191145159304142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7479797033930865, + "compression_loss": 0.0, + "distillation_loss": 0.31535133719444275, + "epoch": 1.57, + "learning_rate": 4.4089328794751954e-05, + "loss": 0.3183, + "step": 1650, + "task_loss": 0.3446260094642639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7480027348109664, + "compression_loss": 0.0, + "distillation_loss": 0.04977913200855255, + "epoch": 1.57, + "learning_rate": 4.4082446741627906e-05, + "loss": 0.0466, + "step": 1651, + "task_loss": 0.018024973571300507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7480257594299038, + "compression_loss": 0.0, + "distillation_loss": 0.17800500988960266, + "epoch": 1.57, + "learning_rate": 4.40755612220855e-05, + "loss": 0.1833, + "step": 1652, + "task_loss": 0.23083500564098358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7480487772509024, + "compression_loss": 0.0, + "distillation_loss": 0.07639691978693008, + "epoch": 1.57, + "learning_rate": 4.406867223737553e-05, + "loss": 0.085, + "step": 1653, + "task_loss": 0.16263903677463531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7480717882749659, + "compression_loss": 0.0, + "distillation_loss": 0.14029891788959503, + "epoch": 1.57, + "learning_rate": 4.406177978874941e-05, + "loss": 0.1392, + "step": 1654, + "task_loss": 0.12951192259788513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7480947925030978, + "compression_loss": 0.0, + "distillation_loss": 0.16938380897045135, + "epoch": 1.57, + "learning_rate": 4.405488387745919e-05, + "loss": 0.1714, + "step": 1655, + "task_loss": 0.19002079963684082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7481177899363021, + "compression_loss": 0.0, + "distillation_loss": 0.1673574298620224, + "epoch": 1.57, + "learning_rate": 4.4047984504757544e-05, + "loss": 0.1728, + "step": 1656, + "task_loss": 0.2220241129398346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7481407805755822, + "compression_loss": 0.0, + "distillation_loss": 0.1588287055492401, + "epoch": 1.57, + "learning_rate": 4.4041081671897775e-05, + "loss": 0.15, + "step": 1657, + "task_loss": 0.07006968557834625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.748163764421942, + "compression_loss": 0.0, + "distillation_loss": 0.20809435844421387, + "epoch": 1.57, + "learning_rate": 4.403417538013382e-05, + "loss": 0.2015, + "step": 1658, + "task_loss": 0.1424122452735901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.748186741476385, + "compression_loss": 0.0, + "distillation_loss": 0.24815024435520172, + "epoch": 1.58, + "learning_rate": 4.402726563072024e-05, + "loss": 0.2647, + "step": 1659, + "task_loss": 0.4135337769985199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.748209711739915, + "compression_loss": 0.0, + "distillation_loss": 0.10159791260957718, + "epoch": 1.58, + "learning_rate": 4.4020352424912226e-05, + "loss": 0.1108, + "step": 1660, + "task_loss": 0.193673774600029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7482326752135356, + "compression_loss": 0.0, + "distillation_loss": 0.16284605860710144, + "epoch": 1.58, + "learning_rate": 4.401343576396558e-05, + "loss": 0.1678, + "step": 1661, + "task_loss": 0.2123304009437561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7482556318982505, + "compression_loss": 0.0, + "distillation_loss": 0.06474165618419647, + "epoch": 1.58, + "learning_rate": 4.400651564913676e-05, + "loss": 0.0615, + "step": 1662, + "task_loss": 0.03229294717311859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7482785817950635, + "compression_loss": 0.0, + "distillation_loss": 0.18344372510910034, + "epoch": 1.58, + "learning_rate": 4.399959208168284e-05, + "loss": 0.1741, + "step": 1663, + "task_loss": 0.09026458114385605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7483015249049781, + "compression_loss": 0.0, + "distillation_loss": 0.19920691847801208, + "epoch": 1.58, + "learning_rate": 4.3992665062861514e-05, + "loss": 0.1956, + "step": 1664, + "task_loss": 0.16354140639305115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7483244612289981, + "compression_loss": 0.0, + "distillation_loss": 0.09947105497121811, + "epoch": 1.58, + "learning_rate": 4.398573459393111e-05, + "loss": 0.0958, + "step": 1665, + "task_loss": 0.06266459077596664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7483473907681272, + "compression_loss": 0.0, + "distillation_loss": 0.12703344225883484, + "epoch": 1.58, + "learning_rate": 4.3978800676150575e-05, + "loss": 0.1194, + "step": 1666, + "task_loss": 0.050371818244457245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.748370313523369, + "compression_loss": 0.0, + "distillation_loss": 0.546389639377594, + "epoch": 1.58, + "learning_rate": 4.39718633107795e-05, + "loss": 0.5258, + "step": 1667, + "task_loss": 0.3403877019882202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7483932294957272, + "compression_loss": 0.0, + "distillation_loss": 0.0885571613907814, + "epoch": 1.58, + "learning_rate": 4.3964922499078084e-05, + "loss": 0.0837, + "step": 1668, + "task_loss": 0.03993313014507294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7484161386862055, + "compression_loss": 0.0, + "distillation_loss": 0.1787647306919098, + "epoch": 1.58, + "learning_rate": 4.3957978242307166e-05, + "loss": 0.175, + "step": 1669, + "task_loss": 0.14156457781791687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7484390410958076, + "compression_loss": 0.0, + "distillation_loss": 0.11095503717660904, + "epoch": 1.59, + "learning_rate": 4.395103054172819e-05, + "loss": 0.1135, + "step": 1670, + "task_loss": 0.13681727647781372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7484619367255372, + "compression_loss": 0.0, + "distillation_loss": 0.09957759082317352, + "epoch": 1.59, + "learning_rate": 4.394407939860325e-05, + "loss": 0.1075, + "step": 1671, + "task_loss": 0.17913945019245148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7484848255763978, + "compression_loss": 0.0, + "distillation_loss": 0.14932695031166077, + "epoch": 1.59, + "learning_rate": 4.3937124814195054e-05, + "loss": 0.1552, + "step": 1672, + "task_loss": 0.20759890973567963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7485077076493933, + "compression_loss": 0.0, + "distillation_loss": 0.11619941145181656, + "epoch": 1.59, + "learning_rate": 4.393016678976692e-05, + "loss": 0.1115, + "step": 1673, + "task_loss": 0.06885246187448502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7485305829455273, + "compression_loss": 0.0, + "distillation_loss": 0.12013675272464752, + "epoch": 1.59, + "learning_rate": 4.3923205326582837e-05, + "loss": 0.1239, + "step": 1674, + "task_loss": 0.15799427032470703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7485534514658035, + "compression_loss": 0.0, + "distillation_loss": 0.08647421002388, + "epoch": 1.59, + "learning_rate": 4.3916240425907364e-05, + "loss": 0.0989, + "step": 1675, + "task_loss": 0.2103899121284485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7485763132112256, + "compression_loss": 0.0, + "distillation_loss": 0.25060856342315674, + "epoch": 1.59, + "learning_rate": 4.3909272089005714e-05, + "loss": 0.2436, + "step": 1676, + "task_loss": 0.18063423037528992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7485991681827973, + "compression_loss": 0.0, + "distillation_loss": 0.23684711754322052, + "epoch": 1.59, + "learning_rate": 4.3902300317143726e-05, + "loss": 0.2305, + "step": 1677, + "task_loss": 0.17349837720394135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7486220163815221, + "compression_loss": 0.0, + "distillation_loss": 0.2393345683813095, + "epoch": 1.59, + "learning_rate": 4.389532511158785e-05, + "loss": 0.2251, + "step": 1678, + "task_loss": 0.09700442850589752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.748644857808404, + "compression_loss": 0.0, + "distillation_loss": 0.1394980251789093, + "epoch": 1.59, + "learning_rate": 4.388834647360516e-05, + "loss": 0.1531, + "step": 1679, + "task_loss": 0.2758055329322815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7486676924644463, + "compression_loss": 0.0, + "distillation_loss": 0.08193551748991013, + "epoch": 1.6, + "learning_rate": 4.388136440446337e-05, + "loss": 0.0893, + "step": 1680, + "task_loss": 0.15590821206569672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.748690520350653, + "compression_loss": 0.0, + "distillation_loss": 0.032735083252191544, + "epoch": 1.6, + "learning_rate": 4.387437890543081e-05, + "loss": 0.0302, + "step": 1681, + "task_loss": 0.007712380960583687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7487133414680276, + "compression_loss": 0.0, + "distillation_loss": 0.2094217985868454, + "epoch": 1.6, + "learning_rate": 4.3867389977776416e-05, + "loss": 0.2026, + "step": 1682, + "task_loss": 0.14127467572689056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7487361558175739, + "compression_loss": 0.0, + "distillation_loss": 0.14119529724121094, + "epoch": 1.6, + "learning_rate": 4.3860397622769756e-05, + "loss": 0.1352, + "step": 1683, + "task_loss": 0.08111706376075745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7487589634002955, + "compression_loss": 0.0, + "distillation_loss": 0.2153528779745102, + "epoch": 1.6, + "learning_rate": 4.3853401841681046e-05, + "loss": 0.2241, + "step": 1684, + "task_loss": 0.3023759126663208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7487817642171961, + "compression_loss": 0.0, + "distillation_loss": 0.050505850464105606, + "epoch": 1.6, + "learning_rate": 4.3846402635781093e-05, + "loss": 0.0562, + "step": 1685, + "task_loss": 0.1069660410284996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7488045582692795, + "compression_loss": 0.0, + "distillation_loss": 0.1630827784538269, + "epoch": 1.6, + "learning_rate": 4.3839400006341335e-05, + "loss": 0.1598, + "step": 1686, + "task_loss": 0.1302591860294342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7488273455575492, + "compression_loss": 0.0, + "distillation_loss": 0.03398447483778, + "epoch": 1.6, + "learning_rate": 4.383239395463383e-05, + "loss": 0.0397, + "step": 1687, + "task_loss": 0.090923011302948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7488501260830089, + "compression_loss": 0.0, + "distillation_loss": 0.06578654050827026, + "epoch": 1.6, + "learning_rate": 4.382538448193127e-05, + "loss": 0.0629, + "step": 1688, + "task_loss": 0.037396807223558426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7488728998466624, + "compression_loss": 0.0, + "distillation_loss": 0.11456318199634552, + "epoch": 1.6, + "learning_rate": 4.381837158950695e-05, + "loss": 0.1149, + "step": 1689, + "task_loss": 0.11771449446678162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7488956668495133, + "compression_loss": 0.0, + "distillation_loss": 0.09141987562179565, + "epoch": 1.6, + "learning_rate": 4.3811355278634804e-05, + "loss": 0.0861, + "step": 1690, + "task_loss": 0.03799279406666756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7489184270925653, + "compression_loss": 0.0, + "distillation_loss": 0.07688391953706741, + "epoch": 1.61, + "learning_rate": 4.380433555058937e-05, + "loss": 0.0735, + "step": 1691, + "task_loss": 0.04309658333659172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7489411805768221, + "compression_loss": 0.0, + "distillation_loss": 0.24183808267116547, + "epoch": 1.61, + "learning_rate": 4.379731240664583e-05, + "loss": 0.2428, + "step": 1692, + "task_loss": 0.251809298992157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7489639273032873, + "compression_loss": 0.0, + "distillation_loss": 0.04763605445623398, + "epoch": 1.61, + "learning_rate": 4.379028584807996e-05, + "loss": 0.0519, + "step": 1693, + "task_loss": 0.09006030112504959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7489866672729648, + "compression_loss": 0.0, + "distillation_loss": 0.26702186465263367, + "epoch": 1.61, + "learning_rate": 4.3783255876168165e-05, + "loss": 0.2575, + "step": 1694, + "task_loss": 0.17218661308288574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.749009400486858, + "compression_loss": 0.0, + "distillation_loss": 0.1826254427433014, + "epoch": 1.61, + "learning_rate": 4.377622249218748e-05, + "loss": 0.1761, + "step": 1695, + "task_loss": 0.11716414242982864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7490321269459708, + "compression_loss": 0.0, + "distillation_loss": 0.06596788763999939, + "epoch": 1.61, + "learning_rate": 4.376918569741556e-05, + "loss": 0.078, + "step": 1696, + "task_loss": 0.18661688268184662 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7490548466513067, + "compression_loss": 0.0, + "distillation_loss": 0.07024285942316055, + "epoch": 1.61, + "learning_rate": 4.376214549313066e-05, + "loss": 0.0653, + "step": 1697, + "task_loss": 0.020744740962982178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7490775596038696, + "compression_loss": 0.0, + "distillation_loss": 0.13795125484466553, + "epoch": 1.61, + "learning_rate": 4.375510188061167e-05, + "loss": 0.1382, + "step": 1698, + "task_loss": 0.14067493379116058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.749100265804663, + "compression_loss": 0.0, + "distillation_loss": 0.06560136377811432, + "epoch": 1.61, + "learning_rate": 4.37480548611381e-05, + "loss": 0.0745, + "step": 1699, + "task_loss": 0.15437015891075134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7491229652546907, + "compression_loss": 0.0, + "distillation_loss": 0.10080569982528687, + "epoch": 1.61, + "learning_rate": 4.374100443599007e-05, + "loss": 0.0956, + "step": 1700, + "task_loss": 0.04915327578783035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7491456579549562, + "compression_loss": 0.0, + "distillation_loss": 0.151437908411026, + "epoch": 1.62, + "learning_rate": 4.3733950606448324e-05, + "loss": 0.1444, + "step": 1701, + "task_loss": 0.08063948154449463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7491683439064634, + "compression_loss": 0.0, + "distillation_loss": 0.14637860655784607, + "epoch": 1.62, + "learning_rate": 4.3726893373794234e-05, + "loss": 0.1415, + "step": 1702, + "task_loss": 0.09764857590198517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7491910231102158, + "compression_loss": 0.0, + "distillation_loss": 0.18097403645515442, + "epoch": 1.62, + "learning_rate": 4.3719832739309766e-05, + "loss": 0.1771, + "step": 1703, + "task_loss": 0.1424616128206253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7492136955672173, + "compression_loss": 0.0, + "distillation_loss": 0.22387400269508362, + "epoch": 1.62, + "learning_rate": 4.371276870427753e-05, + "loss": 0.2137, + "step": 1704, + "task_loss": 0.12188294529914856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7492363612784714, + "compression_loss": 0.0, + "distillation_loss": 0.09288572520017624, + "epoch": 1.62, + "learning_rate": 4.3705701269980734e-05, + "loss": 0.0888, + "step": 1705, + "task_loss": 0.05178219825029373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7492590202449818, + "compression_loss": 0.0, + "distillation_loss": 0.05206472426652908, + "epoch": 1.62, + "learning_rate": 4.369863043770322e-05, + "loss": 0.048, + "step": 1706, + "task_loss": 0.01116347685456276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7492816724677522, + "compression_loss": 0.0, + "distillation_loss": 0.11153702437877655, + "epoch": 1.62, + "learning_rate": 4.369155620872943e-05, + "loss": 0.1047, + "step": 1707, + "task_loss": 0.04338126629590988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7493043179477863, + "compression_loss": 0.0, + "distillation_loss": 0.15479451417922974, + "epoch": 1.62, + "learning_rate": 4.3684478584344433e-05, + "loss": 0.1515, + "step": 1708, + "task_loss": 0.12206155061721802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7493269566860878, + "compression_loss": 0.0, + "distillation_loss": 0.05233832448720932, + "epoch": 1.62, + "learning_rate": 4.367739756583392e-05, + "loss": 0.0498, + "step": 1709, + "task_loss": 0.026678021997213364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7493495886836604, + "compression_loss": 0.0, + "distillation_loss": 0.05933618173003197, + "epoch": 1.62, + "learning_rate": 4.367031315448419e-05, + "loss": 0.0733, + "step": 1710, + "task_loss": 0.19880297780036926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7493722139415077, + "compression_loss": 0.0, + "distillation_loss": 0.1785992830991745, + "epoch": 1.62, + "learning_rate": 4.366322535158215e-05, + "loss": 0.1674, + "step": 1711, + "task_loss": 0.06677613407373428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7493948324606334, + "compression_loss": 0.0, + "distillation_loss": 0.05169357359409332, + "epoch": 1.63, + "learning_rate": 4.3656134158415344e-05, + "loss": 0.0492, + "step": 1712, + "task_loss": 0.026786495000123978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7494174442420414, + "compression_loss": 0.0, + "distillation_loss": 0.09955106675624847, + "epoch": 1.63, + "learning_rate": 4.364903957627192e-05, + "loss": 0.0959, + "step": 1713, + "task_loss": 0.06332787871360779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.749440049286735, + "compression_loss": 0.0, + "distillation_loss": 0.20070070028305054, + "epoch": 1.63, + "learning_rate": 4.3641941606440644e-05, + "loss": 0.1948, + "step": 1714, + "task_loss": 0.14132705330848694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7494626475957181, + "compression_loss": 0.0, + "distillation_loss": 0.07433759421110153, + "epoch": 1.63, + "learning_rate": 4.36348402502109e-05, + "loss": 0.0819, + "step": 1715, + "task_loss": 0.15012231469154358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7494852391699944, + "compression_loss": 0.0, + "distillation_loss": 0.13596729934215546, + "epoch": 1.63, + "learning_rate": 4.3627735508872666e-05, + "loss": 0.1536, + "step": 1716, + "task_loss": 0.31197068095207214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7495078240105675, + "compression_loss": 0.0, + "distillation_loss": 0.2269076108932495, + "epoch": 1.63, + "learning_rate": 4.362062738371657e-05, + "loss": 0.2384, + "step": 1717, + "task_loss": 0.3419533371925354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7495304021184411, + "compression_loss": 0.0, + "distillation_loss": 0.20675890147686005, + "epoch": 1.63, + "learning_rate": 4.361351587603384e-05, + "loss": 0.2075, + "step": 1718, + "task_loss": 0.21458357572555542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7495529734946189, + "compression_loss": 0.0, + "distillation_loss": 0.10246886312961578, + "epoch": 1.63, + "learning_rate": 4.360640098711629e-05, + "loss": 0.0964, + "step": 1719, + "task_loss": 0.04224860668182373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7495755381401047, + "compression_loss": 0.0, + "distillation_loss": 0.250265896320343, + "epoch": 1.63, + "learning_rate": 4.3599282718256406e-05, + "loss": 0.2593, + "step": 1720, + "task_loss": 0.34033694863319397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.749598096055902, + "compression_loss": 0.0, + "distillation_loss": 0.11491803824901581, + "epoch": 1.63, + "learning_rate": 4.3592161070747233e-05, + "loss": 0.1297, + "step": 1721, + "task_loss": 0.2628564238548279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7496206472430145, + "compression_loss": 0.0, + "distillation_loss": 0.05386704206466675, + "epoch": 1.64, + "learning_rate": 4.358503604588247e-05, + "loss": 0.0536, + "step": 1722, + "task_loss": 0.050750650465488434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.749643191702446, + "compression_loss": 0.0, + "distillation_loss": 0.35980457067489624, + "epoch": 1.64, + "learning_rate": 4.357790764495639e-05, + "loss": 0.3559, + "step": 1723, + "task_loss": 0.3210410475730896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7496657294352, + "compression_loss": 0.0, + "distillation_loss": 0.08513958752155304, + "epoch": 1.64, + "learning_rate": 4.357077586926392e-05, + "loss": 0.0913, + "step": 1724, + "task_loss": 0.14632166922092438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7496882604422804, + "compression_loss": 0.0, + "distillation_loss": 0.16130788624286652, + "epoch": 1.64, + "learning_rate": 4.356364072010059e-05, + "loss": 0.1687, + "step": 1725, + "task_loss": 0.23540650308132172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7497107847246908, + "compression_loss": 0.0, + "distillation_loss": 0.10402365028858185, + "epoch": 1.64, + "learning_rate": 4.3556502198762496e-05, + "loss": 0.1023, + "step": 1726, + "task_loss": 0.08679551631212234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7497333022834348, + "compression_loss": 0.0, + "distillation_loss": 0.07432233542203903, + "epoch": 1.64, + "learning_rate": 4.354936030654642e-05, + "loss": 0.0807, + "step": 1727, + "task_loss": 0.13773983716964722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7497558131195161, + "compression_loss": 0.0, + "distillation_loss": 0.2607779800891876, + "epoch": 1.64, + "learning_rate": 4.3542215044749705e-05, + "loss": 0.2518, + "step": 1728, + "task_loss": 0.1711687445640564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7497783172339385, + "compression_loss": 0.0, + "distillation_loss": 0.059374839067459106, + "epoch": 1.64, + "learning_rate": 4.3535066414670336e-05, + "loss": 0.0628, + "step": 1729, + "task_loss": 0.09339209645986557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7498008146277056, + "compression_loss": 0.0, + "distillation_loss": 0.20149439573287964, + "epoch": 1.64, + "learning_rate": 4.352791441760687e-05, + "loss": 0.1973, + "step": 1730, + "task_loss": 0.15943476557731628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7498233053018211, + "compression_loss": 0.0, + "distillation_loss": 0.13185396790504456, + "epoch": 1.64, + "learning_rate": 4.352075905485854e-05, + "loss": 0.1234, + "step": 1731, + "task_loss": 0.04712344706058502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7498457892572886, + "compression_loss": 0.0, + "distillation_loss": 0.1501794159412384, + "epoch": 1.64, + "learning_rate": 4.351360032772512e-05, + "loss": 0.1726, + "step": 1732, + "task_loss": 0.3744635581970215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.749868266495112, + "compression_loss": 0.0, + "distillation_loss": 0.07114310562610626, + "epoch": 1.65, + "learning_rate": 4.3506438237507033e-05, + "loss": 0.0689, + "step": 1733, + "task_loss": 0.048517607152462006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7498907370162946, + "compression_loss": 0.0, + "distillation_loss": 0.13893577456474304, + "epoch": 1.65, + "learning_rate": 4.3499272785505316e-05, + "loss": 0.159, + "step": 1734, + "task_loss": 0.33932289481163025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7499132008218404, + "compression_loss": 0.0, + "distillation_loss": 0.14102880656719208, + "epoch": 1.65, + "learning_rate": 4.349210397302161e-05, + "loss": 0.1395, + "step": 1735, + "task_loss": 0.12529143691062927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7499356579127531, + "compression_loss": 0.0, + "distillation_loss": 0.2732577919960022, + "epoch": 1.65, + "learning_rate": 4.348493180135815e-05, + "loss": 0.2615, + "step": 1736, + "task_loss": 0.15519018471240997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7499581082900362, + "compression_loss": 0.0, + "distillation_loss": 0.061276476830244064, + "epoch": 1.65, + "learning_rate": 4.347775627181782e-05, + "loss": 0.0631, + "step": 1737, + "task_loss": 0.07924476265907288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7499805519546935, + "compression_loss": 0.0, + "distillation_loss": 0.20356178283691406, + "epoch": 1.65, + "learning_rate": 4.3470577385704056e-05, + "loss": 0.2137, + "step": 1738, + "task_loss": 0.3052142560482025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7500029889077287, + "compression_loss": 0.0, + "distillation_loss": 0.24523907899856567, + "epoch": 1.65, + "learning_rate": 4.346339514432096e-05, + "loss": 0.2379, + "step": 1739, + "task_loss": 0.17217698693275452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7500254191501452, + "compression_loss": 0.0, + "distillation_loss": 0.14814287424087524, + "epoch": 1.65, + "learning_rate": 4.345620954897322e-05, + "loss": 0.1381, + "step": 1740, + "task_loss": 0.04762396588921547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7500478426829471, + "compression_loss": 0.0, + "distillation_loss": 0.08230330049991608, + "epoch": 1.65, + "learning_rate": 4.344902060096612e-05, + "loss": 0.0829, + "step": 1741, + "task_loss": 0.08790513873100281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7500702595071379, + "compression_loss": 0.0, + "distillation_loss": 0.12028224766254425, + "epoch": 1.65, + "learning_rate": 4.344182830160558e-05, + "loss": 0.1143, + "step": 1742, + "task_loss": 0.060103029012680054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7500926696237211, + "compression_loss": 0.0, + "distillation_loss": 0.12078093737363815, + "epoch": 1.66, + "learning_rate": 4.343463265219811e-05, + "loss": 0.115, + "step": 1743, + "task_loss": 0.06334509700536728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7501150730337007, + "compression_loss": 0.0, + "distillation_loss": 0.10595209896564484, + "epoch": 1.66, + "learning_rate": 4.342743365405084e-05, + "loss": 0.1103, + "step": 1744, + "task_loss": 0.14954574406147003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7501374697380803, + "compression_loss": 0.0, + "distillation_loss": 0.16210979223251343, + "epoch": 1.66, + "learning_rate": 4.3420231308471496e-05, + "loss": 0.1555, + "step": 1745, + "task_loss": 0.09629593789577484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7501598597378634, + "compression_loss": 0.0, + "distillation_loss": 0.185985267162323, + "epoch": 1.66, + "learning_rate": 4.3413025616768424e-05, + "loss": 0.1858, + "step": 1746, + "task_loss": 0.1842183768749237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7501822430340538, + "compression_loss": 0.0, + "distillation_loss": 0.1382254958152771, + "epoch": 1.66, + "learning_rate": 4.340581658025058e-05, + "loss": 0.1359, + "step": 1747, + "task_loss": 0.11496403813362122 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7502046196276552, + "compression_loss": 0.0, + "distillation_loss": 0.0522196963429451, + "epoch": 1.66, + "learning_rate": 4.33986042002275e-05, + "loss": 0.058, + "step": 1748, + "task_loss": 0.10959599912166595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7502269895196713, + "compression_loss": 0.0, + "distillation_loss": 0.045316457748413086, + "epoch": 1.66, + "learning_rate": 4.339138847800936e-05, + "loss": 0.0488, + "step": 1749, + "task_loss": 0.08048881590366364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7502493527111057, + "compression_loss": 0.0, + "distillation_loss": 0.11536607146263123, + "epoch": 1.66, + "learning_rate": 4.3384169414906925e-05, + "loss": 0.1198, + "step": 1750, + "task_loss": 0.1599726378917694 + }, + { + "epoch": 1.66, + "eval_accuracy": 0.9002293577981652, + "eval_loss": 0.3980746567249298, + "eval_runtime": 18.3137, + "eval_samples_per_second": 47.615, + "eval_steps_per_second": 5.952, + "step": 1750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7502717092029622, + "compression_loss": 0.0, + "distillation_loss": 0.22586020827293396, + "epoch": 1.66, + "learning_rate": 4.3376947012231586e-05, + "loss": 0.214, + "step": 1751, + "task_loss": 0.10768207907676697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7502940589962444, + "compression_loss": 0.0, + "distillation_loss": 0.10226061195135117, + "epoch": 1.66, + "learning_rate": 4.336972127129532e-05, + "loss": 0.1036, + "step": 1752, + "task_loss": 0.11605414748191833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7503164020919558, + "compression_loss": 0.0, + "distillation_loss": 0.2922346591949463, + "epoch": 1.66, + "learning_rate": 4.3362492193410705e-05, + "loss": 0.2796, + "step": 1753, + "task_loss": 0.1663048416376114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7503387384911004, + "compression_loss": 0.0, + "distillation_loss": 0.05565609037876129, + "epoch": 1.67, + "learning_rate": 4.335525977989095e-05, + "loss": 0.0522, + "step": 1754, + "task_loss": 0.020734498277306557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7503610681946818, + "compression_loss": 0.0, + "distillation_loss": 0.19371697306632996, + "epoch": 1.67, + "learning_rate": 4.334802403204986e-05, + "loss": 0.1989, + "step": 1755, + "task_loss": 0.2459201216697693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7503833912037037, + "compression_loss": 0.0, + "distillation_loss": 0.12561503052711487, + "epoch": 1.67, + "learning_rate": 4.334078495120184e-05, + "loss": 0.1226, + "step": 1756, + "task_loss": 0.0955725908279419 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7504057075191697, + "compression_loss": 0.0, + "distillation_loss": 0.25841909646987915, + "epoch": 1.67, + "learning_rate": 4.33335425386619e-05, + "loss": 0.2526, + "step": 1757, + "task_loss": 0.2000465989112854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7504280171420834, + "compression_loss": 0.0, + "distillation_loss": 0.18024028837680817, + "epoch": 1.67, + "learning_rate": 4.332629679574566e-05, + "loss": 0.1706, + "step": 1758, + "task_loss": 0.08373329043388367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7504503200734487, + "compression_loss": 0.0, + "distillation_loss": 0.21449556946754456, + "epoch": 1.67, + "learning_rate": 4.331904772376935e-05, + "loss": 0.2073, + "step": 1759, + "task_loss": 0.14211583137512207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7504726163142691, + "compression_loss": 0.0, + "distillation_loss": 0.17756842076778412, + "epoch": 1.67, + "learning_rate": 4.3311795324049795e-05, + "loss": 0.1765, + "step": 1760, + "task_loss": 0.16666541993618011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7504949058655483, + "compression_loss": 0.0, + "distillation_loss": 0.1488828957080841, + "epoch": 1.67, + "learning_rate": 4.3304539597904435e-05, + "loss": 0.1445, + "step": 1761, + "task_loss": 0.10537352412939072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7505171887282901, + "compression_loss": 0.0, + "distillation_loss": 0.18257030844688416, + "epoch": 1.67, + "learning_rate": 4.3297280546651295e-05, + "loss": 0.185, + "step": 1762, + "task_loss": 0.20700550079345703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7505394649034981, + "compression_loss": 0.0, + "distillation_loss": 0.04670891910791397, + "epoch": 1.67, + "learning_rate": 4.329001817160903e-05, + "loss": 0.05, + "step": 1763, + "task_loss": 0.07921046018600464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.750561734392176, + "compression_loss": 0.0, + "distillation_loss": 0.15156331658363342, + "epoch": 1.68, + "learning_rate": 4.3282752474096864e-05, + "loss": 0.1615, + "step": 1764, + "task_loss": 0.2507190704345703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7505839971953274, + "compression_loss": 0.0, + "distillation_loss": 0.23599442839622498, + "epoch": 1.68, + "learning_rate": 4.327548345543467e-05, + "loss": 0.2256, + "step": 1765, + "task_loss": 0.13191649317741394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7506062533139561, + "compression_loss": 0.0, + "distillation_loss": 0.15651676058769226, + "epoch": 1.68, + "learning_rate": 4.326821111694289e-05, + "loss": 0.1509, + "step": 1766, + "task_loss": 0.10070617496967316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7506285027490659, + "compression_loss": 0.0, + "distillation_loss": 0.14649711549282074, + "epoch": 1.68, + "learning_rate": 4.3260935459942584e-05, + "loss": 0.144, + "step": 1767, + "task_loss": 0.12118849158287048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7506507455016602, + "compression_loss": 0.0, + "distillation_loss": 0.05859662592411041, + "epoch": 1.68, + "learning_rate": 4.32536564857554e-05, + "loss": 0.0547, + "step": 1768, + "task_loss": 0.020115777850151062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7506729815727428, + "compression_loss": 0.0, + "distillation_loss": 0.24223226308822632, + "epoch": 1.68, + "learning_rate": 4.3246374195703604e-05, + "loss": 0.2509, + "step": 1769, + "task_loss": 0.32900571823120117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7506952109633174, + "compression_loss": 0.0, + "distillation_loss": 0.11694598197937012, + "epoch": 1.68, + "learning_rate": 4.3239088591110065e-05, + "loss": 0.1294, + "step": 1770, + "task_loss": 0.2414626181125641 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7507174336743877, + "compression_loss": 0.0, + "distillation_loss": 0.23970989882946014, + "epoch": 1.68, + "learning_rate": 4.323179967329824e-05, + "loss": 0.2291, + "step": 1771, + "task_loss": 0.13375961780548096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7507396497069573, + "compression_loss": 0.0, + "distillation_loss": 0.050326522439718246, + "epoch": 1.68, + "learning_rate": 4.3224507443592196e-05, + "loss": 0.0593, + "step": 1772, + "task_loss": 0.13998878002166748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.75076185906203, + "compression_loss": 0.0, + "distillation_loss": 0.23953363299369812, + "epoch": 1.68, + "learning_rate": 4.321721190331661e-05, + "loss": 0.2254, + "step": 1773, + "task_loss": 0.09803837537765503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7507840617406093, + "compression_loss": 0.0, + "distillation_loss": 0.12072421610355377, + "epoch": 1.68, + "learning_rate": 4.3209913053796746e-05, + "loss": 0.1307, + "step": 1774, + "task_loss": 0.2204258143901825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7508062577436991, + "compression_loss": 0.0, + "distillation_loss": 0.2765160799026489, + "epoch": 1.69, + "learning_rate": 4.3202610896358474e-05, + "loss": 0.2627, + "step": 1775, + "task_loss": 0.13829368352890015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7508284470723029, + "compression_loss": 0.0, + "distillation_loss": 0.16436317563056946, + "epoch": 1.69, + "learning_rate": 4.319530543232827e-05, + "loss": 0.1646, + "step": 1776, + "task_loss": 0.16645075380802155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7508506297274246, + "compression_loss": 0.0, + "distillation_loss": 0.1174478605389595, + "epoch": 1.69, + "learning_rate": 4.31879966630332e-05, + "loss": 0.1172, + "step": 1777, + "task_loss": 0.11536243557929993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7508728057100676, + "compression_loss": 0.0, + "distillation_loss": 0.33932632207870483, + "epoch": 1.69, + "learning_rate": 4.318068458980095e-05, + "loss": 0.3292, + "step": 1778, + "task_loss": 0.23805338144302368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7508949750212358, + "compression_loss": 0.0, + "distillation_loss": 0.08245585113763809, + "epoch": 1.69, + "learning_rate": 4.317336921395978e-05, + "loss": 0.0889, + "step": 1779, + "task_loss": 0.14642596244812012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7509171376619328, + "compression_loss": 0.0, + "distillation_loss": 0.32606542110443115, + "epoch": 1.69, + "learning_rate": 4.316605053683856e-05, + "loss": 0.3143, + "step": 1780, + "task_loss": 0.20817196369171143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7509392936331623, + "compression_loss": 0.0, + "distillation_loss": 0.10882005095481873, + "epoch": 1.69, + "learning_rate": 4.3158728559766786e-05, + "loss": 0.1177, + "step": 1781, + "task_loss": 0.1978674679994583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7509614429359279, + "compression_loss": 0.0, + "distillation_loss": 0.0932815819978714, + "epoch": 1.69, + "learning_rate": 4.315140328407451e-05, + "loss": 0.0889, + "step": 1782, + "task_loss": 0.0497298426926136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7509835855712335, + "compression_loss": 0.0, + "distillation_loss": 0.12256644666194916, + "epoch": 1.69, + "learning_rate": 4.314407471109241e-05, + "loss": 0.1198, + "step": 1783, + "task_loss": 0.0945819541811943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7510057215400826, + "compression_loss": 0.0, + "distillation_loss": 0.09638384729623795, + "epoch": 1.69, + "learning_rate": 4.313674284215176e-05, + "loss": 0.0956, + "step": 1784, + "task_loss": 0.08847030997276306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7510278508434789, + "compression_loss": 0.0, + "distillation_loss": 0.15519554913043976, + "epoch": 1.7, + "learning_rate": 4.312940767858441e-05, + "loss": 0.1552, + "step": 1785, + "task_loss": 0.15559692680835724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7510499734824261, + "compression_loss": 0.0, + "distillation_loss": 0.09053555130958557, + "epoch": 1.7, + "learning_rate": 4.312206922172286e-05, + "loss": 0.0836, + "step": 1786, + "task_loss": 0.021591845899820328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7510720894579278, + "compression_loss": 0.0, + "distillation_loss": 0.0775122344493866, + "epoch": 1.7, + "learning_rate": 4.311472747290015e-05, + "loss": 0.0729, + "step": 1787, + "task_loss": 0.03092704340815544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7510941987709878, + "compression_loss": 0.0, + "distillation_loss": 0.20807501673698425, + "epoch": 1.7, + "learning_rate": 4.310738243344996e-05, + "loss": 0.2011, + "step": 1788, + "task_loss": 0.1382240504026413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7511163014226098, + "compression_loss": 0.0, + "distillation_loss": 0.13786278665065765, + "epoch": 1.7, + "learning_rate": 4.310003410470653e-05, + "loss": 0.137, + "step": 1789, + "task_loss": 0.12924642860889435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7511383974137974, + "compression_loss": 0.0, + "distillation_loss": 0.03874251991510391, + "epoch": 1.7, + "learning_rate": 4.309268248800476e-05, + "loss": 0.0455, + "step": 1790, + "task_loss": 0.10590145736932755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7511604867455544, + "compression_loss": 0.0, + "distillation_loss": 0.16585403680801392, + "epoch": 1.7, + "learning_rate": 4.3085327584680056e-05, + "loss": 0.1747, + "step": 1791, + "task_loss": 0.2540725767612457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7511825694188843, + "compression_loss": 0.0, + "distillation_loss": 0.17077066004276276, + "epoch": 1.7, + "learning_rate": 4.3077969396068505e-05, + "loss": 0.172, + "step": 1792, + "task_loss": 0.18328894674777985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7512046454347909, + "compression_loss": 0.0, + "distillation_loss": 0.23324266076087952, + "epoch": 1.7, + "learning_rate": 4.307060792350675e-05, + "loss": 0.2235, + "step": 1793, + "task_loss": 0.13554205000400543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7512267147942778, + "compression_loss": 0.0, + "distillation_loss": 0.06733982264995575, + "epoch": 1.7, + "learning_rate": 4.306324316833203e-05, + "loss": 0.0625, + "step": 1794, + "task_loss": 0.019262997433543205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7512487774983488, + "compression_loss": 0.0, + "distillation_loss": 0.1413702815771103, + "epoch": 1.7, + "learning_rate": 4.3055875131882204e-05, + "loss": 0.1334, + "step": 1795, + "task_loss": 0.061707641929388046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7512708335480076, + "compression_loss": 0.0, + "distillation_loss": 0.1853601038455963, + "epoch": 1.71, + "learning_rate": 4.30485038154957e-05, + "loss": 0.1838, + "step": 1796, + "task_loss": 0.16993498802185059 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7512928829442577, + "compression_loss": 0.0, + "distillation_loss": 0.15853723883628845, + "epoch": 1.71, + "learning_rate": 4.304112922051155e-05, + "loss": 0.1542, + "step": 1797, + "task_loss": 0.11558166146278381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7513149256881029, + "compression_loss": 0.0, + "distillation_loss": 0.1695374995470047, + "epoch": 1.71, + "learning_rate": 4.30337513482694e-05, + "loss": 0.1642, + "step": 1798, + "task_loss": 0.11586904525756836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7513369617805469, + "compression_loss": 0.0, + "distillation_loss": 0.05897611379623413, + "epoch": 1.71, + "learning_rate": 4.3026370200109463e-05, + "loss": 0.0712, + "step": 1799, + "task_loss": 0.18152813613414764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7513589912225933, + "compression_loss": 0.0, + "distillation_loss": 0.12656652927398682, + "epoch": 1.71, + "learning_rate": 4.301898577737255e-05, + "loss": 0.1314, + "step": 1800, + "task_loss": 0.17490315437316895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7513810140152459, + "compression_loss": 0.0, + "distillation_loss": 0.05648940056562424, + "epoch": 1.71, + "learning_rate": 4.3011598081400105e-05, + "loss": 0.0567, + "step": 1801, + "task_loss": 0.05847236514091492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7514030301595083, + "compression_loss": 0.0, + "distillation_loss": 0.20593807101249695, + "epoch": 1.71, + "learning_rate": 4.3004207113534124e-05, + "loss": 0.1972, + "step": 1802, + "task_loss": 0.1184128075838089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7514250396563842, + "compression_loss": 0.0, + "distillation_loss": 0.11276452243328094, + "epoch": 1.71, + "learning_rate": 4.2996812875117206e-05, + "loss": 0.1387, + "step": 1803, + "task_loss": 0.3719395101070404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7514470425068772, + "compression_loss": 0.0, + "distillation_loss": 0.23946917057037354, + "epoch": 1.71, + "learning_rate": 4.2989415367492556e-05, + "loss": 0.2358, + "step": 1804, + "task_loss": 0.20264212787151337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7514690387119911, + "compression_loss": 0.0, + "distillation_loss": 0.14112165570259094, + "epoch": 1.71, + "learning_rate": 4.298201459200397e-05, + "loss": 0.1457, + "step": 1805, + "task_loss": 0.1867811530828476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7514910282727296, + "compression_loss": 0.0, + "distillation_loss": 0.18797874450683594, + "epoch": 1.72, + "learning_rate": 4.2974610549995834e-05, + "loss": 0.187, + "step": 1806, + "task_loss": 0.17826765775680542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7515130111900963, + "compression_loss": 0.0, + "distillation_loss": 0.16024428606033325, + "epoch": 1.72, + "learning_rate": 4.296720324281311e-05, + "loss": 0.157, + "step": 1807, + "task_loss": 0.12820284068584442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.751534987465095, + "compression_loss": 0.0, + "distillation_loss": 0.16456547379493713, + "epoch": 1.72, + "learning_rate": 4.29597926718014e-05, + "loss": 0.1509, + "step": 1808, + "task_loss": 0.028283601626753807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.751556957098729, + "compression_loss": 0.0, + "distillation_loss": 0.085496686398983, + "epoch": 1.72, + "learning_rate": 4.295237883830685e-05, + "loss": 0.0912, + "step": 1809, + "task_loss": 0.14275582134723663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7515789200920026, + "compression_loss": 0.0, + "distillation_loss": 0.24256683886051178, + "epoch": 1.72, + "learning_rate": 4.294496174367623e-05, + "loss": 0.2418, + "step": 1810, + "task_loss": 0.23468558490276337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.751600876445919, + "compression_loss": 0.0, + "distillation_loss": 0.08293971419334412, + "epoch": 1.72, + "learning_rate": 4.2937541389256877e-05, + "loss": 0.0832, + "step": 1811, + "task_loss": 0.08516831696033478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.751622826161482, + "compression_loss": 0.0, + "distillation_loss": 0.15826871991157532, + "epoch": 1.72, + "learning_rate": 4.293011777639675e-05, + "loss": 0.1666, + "step": 1812, + "task_loss": 0.2412494570016861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7516447692396955, + "compression_loss": 0.0, + "distillation_loss": 0.16091185808181763, + "epoch": 1.72, + "learning_rate": 4.2922690906444374e-05, + "loss": 0.1583, + "step": 1813, + "task_loss": 0.13493165373802185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7516667056815628, + "compression_loss": 0.0, + "distillation_loss": 0.06874893605709076, + "epoch": 1.72, + "learning_rate": 4.291526078074888e-05, + "loss": 0.0689, + "step": 1814, + "task_loss": 0.07061054557561874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7516886354880878, + "compression_loss": 0.0, + "distillation_loss": 0.1136234849691391, + "epoch": 1.72, + "learning_rate": 4.290782740065997e-05, + "loss": 0.1169, + "step": 1815, + "task_loss": 0.14640654623508453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7517105586602743, + "compression_loss": 0.0, + "distillation_loss": 0.14775055646896362, + "epoch": 1.72, + "learning_rate": 4.290039076752799e-05, + "loss": 0.1389, + "step": 1816, + "task_loss": 0.059689588844776154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7517324751991258, + "compression_loss": 0.0, + "distillation_loss": 0.20547989010810852, + "epoch": 1.73, + "learning_rate": 4.28929508827038e-05, + "loss": 0.2006, + "step": 1817, + "task_loss": 0.15708599984645844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.751754385105646, + "compression_loss": 0.0, + "distillation_loss": 0.03952891379594803, + "epoch": 1.73, + "learning_rate": 4.288550774753892e-05, + "loss": 0.0444, + "step": 1818, + "task_loss": 0.08782260119915009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7517762883808385, + "compression_loss": 0.0, + "distillation_loss": 0.0741434320807457, + "epoch": 1.73, + "learning_rate": 4.2878061363385414e-05, + "loss": 0.0734, + "step": 1819, + "task_loss": 0.06647836416959763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7517981850257073, + "compression_loss": 0.0, + "distillation_loss": 0.08467371761798859, + "epoch": 1.73, + "learning_rate": 4.287061173159597e-05, + "loss": 0.0828, + "step": 1820, + "task_loss": 0.06570696830749512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7518200750412558, + "compression_loss": 0.0, + "distillation_loss": 0.14830920100212097, + "epoch": 1.73, + "learning_rate": 4.286315885352382e-05, + "loss": 0.1399, + "step": 1821, + "task_loss": 0.06412569433450699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7518419584284877, + "compression_loss": 0.0, + "distillation_loss": 0.13605275750160217, + "epoch": 1.73, + "learning_rate": 4.285570273052285e-05, + "loss": 0.1302, + "step": 1822, + "task_loss": 0.07789816707372665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7518638351884068, + "compression_loss": 0.0, + "distillation_loss": 0.12208747863769531, + "epoch": 1.73, + "learning_rate": 4.2848243363947484e-05, + "loss": 0.1132, + "step": 1823, + "task_loss": 0.032726749777793884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7518857053220167, + "compression_loss": 0.0, + "distillation_loss": 0.08085359632968903, + "epoch": 1.73, + "learning_rate": 4.2840780755152746e-05, + "loss": 0.0879, + "step": 1824, + "task_loss": 0.1517452895641327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7519075688303211, + "compression_loss": 0.0, + "distillation_loss": 0.22030451893806458, + "epoch": 1.73, + "learning_rate": 4.283331490549426e-05, + "loss": 0.2232, + "step": 1825, + "task_loss": 0.24884700775146484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7519294257143236, + "compression_loss": 0.0, + "distillation_loss": 0.23822423815727234, + "epoch": 1.73, + "learning_rate": 4.282584581632824e-05, + "loss": 0.2456, + "step": 1826, + "task_loss": 0.3118036389350891 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7519512759750281, + "compression_loss": 0.0, + "distillation_loss": 0.062073417007923126, + "epoch": 1.74, + "learning_rate": 4.281837348901148e-05, + "loss": 0.0686, + "step": 1827, + "task_loss": 0.12697717547416687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7519731196134382, + "compression_loss": 0.0, + "distillation_loss": 0.03941449895501137, + "epoch": 1.74, + "learning_rate": 4.281089792490136e-05, + "loss": 0.0391, + "step": 1828, + "task_loss": 0.036043956875801086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7519949566305574, + "compression_loss": 0.0, + "distillation_loss": 0.14730878174304962, + "epoch": 1.74, + "learning_rate": 4.280341912535585e-05, + "loss": 0.1414, + "step": 1829, + "task_loss": 0.08822986483573914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7520167870273895, + "compression_loss": 0.0, + "distillation_loss": 0.2598916292190552, + "epoch": 1.74, + "learning_rate": 4.2795937091733515e-05, + "loss": 0.2527, + "step": 1830, + "task_loss": 0.18784040212631226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7520386108049383, + "compression_loss": 0.0, + "distillation_loss": 0.0485055074095726, + "epoch": 1.74, + "learning_rate": 4.27884518253935e-05, + "loss": 0.0557, + "step": 1831, + "task_loss": 0.12016290426254272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7520604279642074, + "compression_loss": 0.0, + "distillation_loss": 0.3198782205581665, + "epoch": 1.74, + "learning_rate": 4.278096332769555e-05, + "loss": 0.3177, + "step": 1832, + "task_loss": 0.29813024401664734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7520822385062003, + "compression_loss": 0.0, + "distillation_loss": 0.10080970823764801, + "epoch": 1.74, + "learning_rate": 4.277347159999997e-05, + "loss": 0.1022, + "step": 1833, + "task_loss": 0.11454527825117111 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752104042431921, + "compression_loss": 0.0, + "distillation_loss": 0.06464186310768127, + "epoch": 1.74, + "learning_rate": 4.276597664366767e-05, + "loss": 0.0685, + "step": 1834, + "task_loss": 0.1035253256559372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7521258397423729, + "compression_loss": 0.0, + "distillation_loss": 0.2522902190685272, + "epoch": 1.74, + "learning_rate": 4.2758478460060166e-05, + "loss": 0.2398, + "step": 1835, + "task_loss": 0.12788613140583038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7521476304385599, + "compression_loss": 0.0, + "distillation_loss": 0.15172149240970612, + "epoch": 1.74, + "learning_rate": 4.275097705053951e-05, + "loss": 0.1433, + "step": 1836, + "task_loss": 0.06708873808383942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7521694145214856, + "compression_loss": 0.0, + "distillation_loss": 0.08506453782320023, + "epoch": 1.74, + "learning_rate": 4.2743472416468385e-05, + "loss": 0.0814, + "step": 1837, + "task_loss": 0.048387959599494934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7521911919921536, + "compression_loss": 0.0, + "distillation_loss": 0.07385560125112534, + "epoch": 1.75, + "learning_rate": 4.2735964559210054e-05, + "loss": 0.0693, + "step": 1838, + "task_loss": 0.028625313192605972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7522129628515677, + "compression_loss": 0.0, + "distillation_loss": 0.16923248767852783, + "epoch": 1.75, + "learning_rate": 4.272845348012833e-05, + "loss": 0.1628, + "step": 1839, + "task_loss": 0.10537364333868027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7522347271007315, + "compression_loss": 0.0, + "distillation_loss": 0.3296600878238678, + "epoch": 1.75, + "learning_rate": 4.272093918058766e-05, + "loss": 0.3125, + "step": 1840, + "task_loss": 0.1578628420829773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7522564847406487, + "compression_loss": 0.0, + "distillation_loss": 0.17389464378356934, + "epoch": 1.75, + "learning_rate": 4.271342166195304e-05, + "loss": 0.173, + "step": 1841, + "task_loss": 0.1645306646823883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752278235772323, + "compression_loss": 0.0, + "distillation_loss": 0.08731499314308167, + "epoch": 1.75, + "learning_rate": 4.2705900925590056e-05, + "loss": 0.081, + "step": 1842, + "task_loss": 0.024491865187883377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7522999801967581, + "compression_loss": 0.0, + "distillation_loss": 0.13219568133354187, + "epoch": 1.75, + "learning_rate": 4.269837697286491e-05, + "loss": 0.1257, + "step": 1843, + "task_loss": 0.06739476323127747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7523217180149577, + "compression_loss": 0.0, + "distillation_loss": 0.06735164672136307, + "epoch": 1.75, + "learning_rate": 4.269084980514434e-05, + "loss": 0.0733, + "step": 1844, + "task_loss": 0.12667877972126007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7523434492279254, + "compression_loss": 0.0, + "distillation_loss": 0.1707264930009842, + "epoch": 1.75, + "learning_rate": 4.268331942379571e-05, + "loss": 0.1621, + "step": 1845, + "task_loss": 0.08454715460538864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752365173836665, + "compression_loss": 0.0, + "distillation_loss": 0.15691004693508148, + "epoch": 1.75, + "learning_rate": 4.267578583018694e-05, + "loss": 0.1498, + "step": 1846, + "task_loss": 0.0854022279381752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.75238689184218, + "compression_loss": 0.0, + "distillation_loss": 0.17727318406105042, + "epoch": 1.75, + "learning_rate": 4.2668249025686545e-05, + "loss": 0.1772, + "step": 1847, + "task_loss": 0.1768154799938202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7524086032454743, + "compression_loss": 0.0, + "distillation_loss": 0.14297039806842804, + "epoch": 1.75, + "learning_rate": 4.2660709011663624e-05, + "loss": 0.1431, + "step": 1848, + "task_loss": 0.14434432983398438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7524303080475514, + "compression_loss": 0.0, + "distillation_loss": 0.12875759601593018, + "epoch": 1.76, + "learning_rate": 4.2653165789487864e-05, + "loss": 0.128, + "step": 1849, + "task_loss": 0.12122198939323425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7524520062494151, + "compression_loss": 0.0, + "distillation_loss": 0.048097141087055206, + "epoch": 1.76, + "learning_rate": 4.2645619360529514e-05, + "loss": 0.046, + "step": 1850, + "task_loss": 0.02708207257091999 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752473697852069, + "compression_loss": 0.0, + "distillation_loss": 0.04155917465686798, + "epoch": 1.76, + "learning_rate": 4.2638069726159424e-05, + "loss": 0.0424, + "step": 1851, + "task_loss": 0.04985608160495758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7524953828565168, + "compression_loss": 0.0, + "distillation_loss": 0.06510326266288757, + "epoch": 1.76, + "learning_rate": 4.263051688774902e-05, + "loss": 0.0617, + "step": 1852, + "task_loss": 0.03152014687657356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7525170612637623, + "compression_loss": 0.0, + "distillation_loss": 0.03942890465259552, + "epoch": 1.76, + "learning_rate": 4.262296084667032e-05, + "loss": 0.0426, + "step": 1853, + "task_loss": 0.07090801745653152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752538733074809, + "compression_loss": 0.0, + "distillation_loss": 0.02327614463865757, + "epoch": 1.76, + "learning_rate": 4.2615401604295905e-05, + "loss": 0.0213, + "step": 1854, + "task_loss": 0.0035562757402658463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7525603982906607, + "compression_loss": 0.0, + "distillation_loss": 0.20073172450065613, + "epoch": 1.76, + "learning_rate": 4.260783916199895e-05, + "loss": 0.1898, + "step": 1855, + "task_loss": 0.09144563972949982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752582056912321, + "compression_loss": 0.0, + "distillation_loss": 0.12031463533639908, + "epoch": 1.76, + "learning_rate": 4.260027352115321e-05, + "loss": 0.1236, + "step": 1856, + "task_loss": 0.15320178866386414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7526037089407938, + "compression_loss": 0.0, + "distillation_loss": 0.1642073094844818, + "epoch": 1.76, + "learning_rate": 4.2592704683133035e-05, + "loss": 0.1642, + "step": 1857, + "task_loss": 0.16391614079475403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7526253543770824, + "compression_loss": 0.0, + "distillation_loss": 0.041345108300447464, + "epoch": 1.76, + "learning_rate": 4.258513264931331e-05, + "loss": 0.0553, + "step": 1858, + "task_loss": 0.18059666454792023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7526469932221909, + "compression_loss": 0.0, + "distillation_loss": 0.41769933700561523, + "epoch": 1.77, + "learning_rate": 4.257755742106956e-05, + "loss": 0.4052, + "step": 1859, + "task_loss": 0.29252398014068604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7526686254771225, + "compression_loss": 0.0, + "distillation_loss": 0.32178372144699097, + "epoch": 1.77, + "learning_rate": 4.256997899977784e-05, + "loss": 0.3091, + "step": 1860, + "task_loss": 0.19510038197040558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7526902511428814, + "compression_loss": 0.0, + "distillation_loss": 0.13911914825439453, + "epoch": 1.77, + "learning_rate": 4.2562397386814823e-05, + "loss": 0.133, + "step": 1861, + "task_loss": 0.07751937210559845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7527118702204709, + "compression_loss": 0.0, + "distillation_loss": 0.04612383246421814, + "epoch": 1.77, + "learning_rate": 4.255481258355773e-05, + "loss": 0.0521, + "step": 1862, + "task_loss": 0.10636930912733078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7527334827108949, + "compression_loss": 0.0, + "distillation_loss": 0.1954609900712967, + "epoch": 1.77, + "learning_rate": 4.254722459138441e-05, + "loss": 0.1994, + "step": 1863, + "task_loss": 0.23512773215770721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752755088615157, + "compression_loss": 0.0, + "distillation_loss": 0.29851043224334717, + "epoch": 1.77, + "learning_rate": 4.253963341167321e-05, + "loss": 0.2852, + "step": 1864, + "task_loss": 0.16501733660697937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7527766879342609, + "compression_loss": 0.0, + "distillation_loss": 0.10969699919223785, + "epoch": 1.77, + "learning_rate": 4.253203904580314e-05, + "loss": 0.1038, + "step": 1865, + "task_loss": 0.050924863666296005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7527982806692103, + "compression_loss": 0.0, + "distillation_loss": 0.1796002984046936, + "epoch": 1.77, + "learning_rate": 4.252444149515374e-05, + "loss": 0.177, + "step": 1866, + "task_loss": 0.15360799431800842 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7528198668210088, + "compression_loss": 0.0, + "distillation_loss": 0.14553160965442657, + "epoch": 1.77, + "learning_rate": 4.251684076110514e-05, + "loss": 0.1432, + "step": 1867, + "task_loss": 0.122085340321064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7528414463906601, + "compression_loss": 0.0, + "distillation_loss": 0.2174375355243683, + "epoch": 1.77, + "learning_rate": 4.250923684503806e-05, + "loss": 0.207, + "step": 1868, + "task_loss": 0.11338604241609573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7528630193791679, + "compression_loss": 0.0, + "distillation_loss": 0.29796043038368225, + "epoch": 1.77, + "learning_rate": 4.2501629748333774e-05, + "loss": 0.2902, + "step": 1869, + "task_loss": 0.22043783962726593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7528845857875359, + "compression_loss": 0.0, + "distillation_loss": 0.12145286798477173, + "epoch": 1.78, + "learning_rate": 4.249401947237417e-05, + "loss": 0.1337, + "step": 1870, + "task_loss": 0.24400418996810913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7529061456167679, + "compression_loss": 0.0, + "distillation_loss": 0.1129135936498642, + "epoch": 1.78, + "learning_rate": 4.248640601854166e-05, + "loss": 0.1107, + "step": 1871, + "task_loss": 0.09064217656850815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7529276988678674, + "compression_loss": 0.0, + "distillation_loss": 0.2056802213191986, + "epoch": 1.78, + "learning_rate": 4.247878938821929e-05, + "loss": 0.2075, + "step": 1872, + "task_loss": 0.22419533133506775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7529492455418382, + "compression_loss": 0.0, + "distillation_loss": 0.14987275004386902, + "epoch": 1.78, + "learning_rate": 4.247116958279065e-05, + "loss": 0.1442, + "step": 1873, + "task_loss": 0.09318174421787262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7529707856396838, + "compression_loss": 0.0, + "distillation_loss": 0.09295307099819183, + "epoch": 1.78, + "learning_rate": 4.246354660363991e-05, + "loss": 0.0863, + "step": 1874, + "task_loss": 0.026577245444059372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.752992319162408, + "compression_loss": 0.0, + "distillation_loss": 0.10765902698040009, + "epoch": 1.78, + "learning_rate": 4.245592045215182e-05, + "loss": 0.1006, + "step": 1875, + "task_loss": 0.03689169883728027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7530138461110146, + "compression_loss": 0.0, + "distillation_loss": 0.08064229786396027, + "epoch": 1.78, + "learning_rate": 4.244829112971172e-05, + "loss": 0.0793, + "step": 1876, + "task_loss": 0.06695515662431717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7530353664865072, + "compression_loss": 0.0, + "distillation_loss": 0.07568307220935822, + "epoch": 1.78, + "learning_rate": 4.24406586377055e-05, + "loss": 0.0758, + "step": 1877, + "task_loss": 0.07722204923629761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7530568802898894, + "compression_loss": 0.0, + "distillation_loss": 0.16826829314231873, + "epoch": 1.78, + "learning_rate": 4.2433022977519645e-05, + "loss": 0.1746, + "step": 1878, + "task_loss": 0.23110389709472656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7530783875221648, + "compression_loss": 0.0, + "distillation_loss": 0.22350825369358063, + "epoch": 1.78, + "learning_rate": 4.2425384150541206e-05, + "loss": 0.2159, + "step": 1879, + "task_loss": 0.14728033542633057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7530998881843374, + "compression_loss": 0.0, + "distillation_loss": 0.13128520548343658, + "epoch": 1.79, + "learning_rate": 4.2417742158157816e-05, + "loss": 0.1253, + "step": 1880, + "task_loss": 0.07127057015895844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7531213822774107, + "compression_loss": 0.0, + "distillation_loss": 0.062282584607601166, + "epoch": 1.79, + "learning_rate": 4.2410097001757676e-05, + "loss": 0.0574, + "step": 1881, + "task_loss": 0.013064134865999222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7531428698023883, + "compression_loss": 0.0, + "distillation_loss": 0.014240816235542297, + "epoch": 1.79, + "learning_rate": 4.2402448682729566e-05, + "loss": 0.0133, + "step": 1882, + "task_loss": 0.005121858790516853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.753164350760274, + "compression_loss": 0.0, + "distillation_loss": 0.08013554662466049, + "epoch": 1.79, + "learning_rate": 4.2394797202462844e-05, + "loss": 0.074, + "step": 1883, + "task_loss": 0.019022256135940552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7531858251520714, + "compression_loss": 0.0, + "distillation_loss": 0.3540295362472534, + "epoch": 1.79, + "learning_rate": 4.238714256234744e-05, + "loss": 0.3391, + "step": 1884, + "task_loss": 0.20519450306892395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7532072929787843, + "compression_loss": 0.0, + "distillation_loss": 0.21610870957374573, + "epoch": 1.79, + "learning_rate": 4.237948476377385e-05, + "loss": 0.2104, + "step": 1885, + "task_loss": 0.15874995291233063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7532287542414162, + "compression_loss": 0.0, + "distillation_loss": 0.12253059446811676, + "epoch": 1.79, + "learning_rate": 4.237182380813315e-05, + "loss": 0.1246, + "step": 1886, + "task_loss": 0.14349707961082458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.753250208940971, + "compression_loss": 0.0, + "distillation_loss": 0.02185000665485859, + "epoch": 1.79, + "learning_rate": 4.236415969681699e-05, + "loss": 0.0202, + "step": 1887, + "task_loss": 0.00525074265897274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7532716570784523, + "compression_loss": 0.0, + "distillation_loss": 0.1840369701385498, + "epoch": 1.79, + "learning_rate": 4.23564924312176e-05, + "loss": 0.1743, + "step": 1888, + "task_loss": 0.08696576207876205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7532930986548637, + "compression_loss": 0.0, + "distillation_loss": 0.23074445128440857, + "epoch": 1.79, + "learning_rate": 4.2348822012727765e-05, + "loss": 0.2274, + "step": 1889, + "task_loss": 0.19752009212970734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7533145336712089, + "compression_loss": 0.0, + "distillation_loss": 0.04407363384962082, + "epoch": 1.79, + "learning_rate": 4.234114844274086e-05, + "loss": 0.0408, + "step": 1890, + "task_loss": 0.01094069704413414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7533359621284917, + "compression_loss": 0.0, + "distillation_loss": 0.11152929067611694, + "epoch": 1.8, + "learning_rate": 4.2333471722650826e-05, + "loss": 0.1117, + "step": 1891, + "task_loss": 0.11322666704654694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7533573840277157, + "compression_loss": 0.0, + "distillation_loss": 0.09612832963466644, + "epoch": 1.8, + "learning_rate": 4.232579185385217e-05, + "loss": 0.0905, + "step": 1892, + "task_loss": 0.03963814303278923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7533787993698846, + "compression_loss": 0.0, + "distillation_loss": 0.3889869451522827, + "epoch": 1.8, + "learning_rate": 4.231810883773999e-05, + "loss": 0.3714, + "step": 1893, + "task_loss": 0.2126784771680832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.753400208156002, + "compression_loss": 0.0, + "distillation_loss": 0.02580692060291767, + "epoch": 1.8, + "learning_rate": 4.231042267570993e-05, + "loss": 0.0241, + "step": 1894, + "task_loss": 0.008529577404260635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7534216103870717, + "compression_loss": 0.0, + "distillation_loss": 0.03107302263379097, + "epoch": 1.8, + "learning_rate": 4.230273336915822e-05, + "loss": 0.0289, + "step": 1895, + "task_loss": 0.0094615388661623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7534430060640973, + "compression_loss": 0.0, + "distillation_loss": 0.03097601607441902, + "epoch": 1.8, + "learning_rate": 4.2295040919481664e-05, + "loss": 0.0287, + "step": 1896, + "task_loss": 0.007949141785502434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7534643951880825, + "compression_loss": 0.0, + "distillation_loss": 0.05745317414402962, + "epoch": 1.8, + "learning_rate": 4.228734532807763e-05, + "loss": 0.0657, + "step": 1897, + "task_loss": 0.13991054892539978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.753485777760031, + "compression_loss": 0.0, + "distillation_loss": 0.028573032468557358, + "epoch": 1.8, + "learning_rate": 4.2279646596344067e-05, + "loss": 0.0357, + "step": 1898, + "task_loss": 0.09934914112091064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7535071537809466, + "compression_loss": 0.0, + "distillation_loss": 0.18852916359901428, + "epoch": 1.8, + "learning_rate": 4.227194472567948e-05, + "loss": 0.1943, + "step": 1899, + "task_loss": 0.24627923965454102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7535285232518327, + "compression_loss": 0.0, + "distillation_loss": 0.24925187230110168, + "epoch": 1.8, + "learning_rate": 4.2264239717482945e-05, + "loss": 0.2396, + "step": 1900, + "task_loss": 0.15310557186603546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7535498861736932, + "compression_loss": 0.0, + "distillation_loss": 0.0920097678899765, + "epoch": 1.81, + "learning_rate": 4.225653157315412e-05, + "loss": 0.0899, + "step": 1901, + "task_loss": 0.07052158564329147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7535712425475317, + "compression_loss": 0.0, + "distillation_loss": 0.13238155841827393, + "epoch": 1.81, + "learning_rate": 4.224882029409323e-05, + "loss": 0.1305, + "step": 1902, + "task_loss": 0.1139896884560585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7535925923743519, + "compression_loss": 0.0, + "distillation_loss": 0.044050946831703186, + "epoch": 1.81, + "learning_rate": 4.224110588170106e-05, + "loss": 0.0424, + "step": 1903, + "task_loss": 0.027057217434048653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7536139356551576, + "compression_loss": 0.0, + "distillation_loss": 0.1628579944372177, + "epoch": 1.81, + "learning_rate": 4.223338833737898e-05, + "loss": 0.154, + "step": 1904, + "task_loss": 0.07387073338031769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7536352723909523, + "compression_loss": 0.0, + "distillation_loss": 0.1415494978427887, + "epoch": 1.81, + "learning_rate": 4.22256676625289e-05, + "loss": 0.1376, + "step": 1905, + "task_loss": 0.10222296416759491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7536566025827397, + "compression_loss": 0.0, + "distillation_loss": 0.016083184629678726, + "epoch": 1.81, + "learning_rate": 4.221794385855334e-05, + "loss": 0.0149, + "step": 1906, + "task_loss": 0.003827514126896858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7536779262315236, + "compression_loss": 0.0, + "distillation_loss": 0.02969953790307045, + "epoch": 1.81, + "learning_rate": 4.221021692685534e-05, + "loss": 0.0271, + "step": 1907, + "task_loss": 0.0035870037972927094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7536992433383076, + "compression_loss": 0.0, + "distillation_loss": 0.13226908445358276, + "epoch": 1.81, + "learning_rate": 4.220248686883857e-05, + "loss": 0.1333, + "step": 1908, + "task_loss": 0.14230315387248993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7537205539040954, + "compression_loss": 0.0, + "distillation_loss": 0.09493161737918854, + "epoch": 1.81, + "learning_rate": 4.21947536859072e-05, + "loss": 0.0904, + "step": 1909, + "task_loss": 0.04967789724469185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7537418579298907, + "compression_loss": 0.0, + "distillation_loss": 0.2932942509651184, + "epoch": 1.81, + "learning_rate": 4.218701737946601e-05, + "loss": 0.2855, + "step": 1910, + "task_loss": 0.21534579992294312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7537631554166971, + "compression_loss": 0.0, + "distillation_loss": 0.2947857081890106, + "epoch": 1.81, + "learning_rate": 4.217927795092034e-05, + "loss": 0.2831, + "step": 1911, + "task_loss": 0.17762699723243713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7537844463655184, + "compression_loss": 0.0, + "distillation_loss": 0.2603399455547333, + "epoch": 1.82, + "learning_rate": 4.21715354016761e-05, + "loss": 0.2464, + "step": 1912, + "task_loss": 0.12051122635602951 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7538057307773582, + "compression_loss": 0.0, + "distillation_loss": 0.19969797134399414, + "epoch": 1.82, + "learning_rate": 4.216378973313976e-05, + "loss": 0.1898, + "step": 1913, + "task_loss": 0.10046619176864624 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7538270086532203, + "compression_loss": 0.0, + "distillation_loss": 0.08806608617305756, + "epoch": 1.82, + "learning_rate": 4.215604094671835e-05, + "loss": 0.0839, + "step": 1914, + "task_loss": 0.04651288688182831 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7538482799941082, + "compression_loss": 0.0, + "distillation_loss": 0.09663443267345428, + "epoch": 1.82, + "learning_rate": 4.214828904381947e-05, + "loss": 0.1037, + "step": 1915, + "task_loss": 0.16719813644886017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7538695448010257, + "compression_loss": 0.0, + "distillation_loss": 0.20862799882888794, + "epoch": 1.82, + "learning_rate": 4.21405340258513e-05, + "loss": 0.2019, + "step": 1916, + "task_loss": 0.14169104397296906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7538908030749765, + "compression_loss": 0.0, + "distillation_loss": 0.11301036179065704, + "epoch": 1.82, + "learning_rate": 4.213277589422258e-05, + "loss": 0.117, + "step": 1917, + "task_loss": 0.1530938744544983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7539120548169642, + "compression_loss": 0.0, + "distillation_loss": 0.3912833333015442, + "epoch": 1.82, + "learning_rate": 4.21250146503426e-05, + "loss": 0.388, + "step": 1918, + "task_loss": 0.358115017414093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7539333000279925, + "compression_loss": 0.0, + "distillation_loss": 0.057676736265420914, + "epoch": 1.82, + "learning_rate": 4.2117250295621235e-05, + "loss": 0.0556, + "step": 1919, + "task_loss": 0.03697335347533226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7539545387090651, + "compression_loss": 0.0, + "distillation_loss": 0.08969536423683167, + "epoch": 1.82, + "learning_rate": 4.210948283146892e-05, + "loss": 0.0894, + "step": 1920, + "task_loss": 0.08631106466054916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7539757708611857, + "compression_loss": 0.0, + "distillation_loss": 0.17513608932495117, + "epoch": 1.82, + "learning_rate": 4.210171225929664e-05, + "loss": 0.1674, + "step": 1921, + "task_loss": 0.09771417081356049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.753996996485358, + "compression_loss": 0.0, + "distillation_loss": 0.1546175479888916, + "epoch": 1.83, + "learning_rate": 4.209393858051598e-05, + "loss": 0.1516, + "step": 1922, + "task_loss": 0.12429603934288025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7540182155825856, + "compression_loss": 0.0, + "distillation_loss": 0.28697076439857483, + "epoch": 1.83, + "learning_rate": 4.208616179653903e-05, + "loss": 0.2767, + "step": 1923, + "task_loss": 0.18397051095962524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7540394281538723, + "compression_loss": 0.0, + "distillation_loss": 0.04862765967845917, + "epoch": 1.83, + "learning_rate": 4.207838190877852e-05, + "loss": 0.0495, + "step": 1924, + "task_loss": 0.05719340965151787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7540606342002216, + "compression_loss": 0.0, + "distillation_loss": 0.2369440346956253, + "epoch": 1.83, + "learning_rate": 4.2070598918647683e-05, + "loss": 0.2375, + "step": 1925, + "task_loss": 0.2421676218509674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7540818337226374, + "compression_loss": 0.0, + "distillation_loss": 0.10661203414201736, + "epoch": 1.83, + "learning_rate": 4.206281282756034e-05, + "loss": 0.101, + "step": 1926, + "task_loss": 0.050882913172245026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7541030267221233, + "compression_loss": 0.0, + "distillation_loss": 0.09290862083435059, + "epoch": 1.83, + "learning_rate": 4.205502363693087e-05, + "loss": 0.0886, + "step": 1927, + "task_loss": 0.04993749409914017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7541242131996828, + "compression_loss": 0.0, + "distillation_loss": 0.13861539959907532, + "epoch": 1.83, + "learning_rate": 4.204723134817422e-05, + "loss": 0.1414, + "step": 1928, + "task_loss": 0.16668032109737396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7541453931563199, + "compression_loss": 0.0, + "distillation_loss": 0.1590609848499298, + "epoch": 1.83, + "learning_rate": 4.2039435962705886e-05, + "loss": 0.1669, + "step": 1929, + "task_loss": 0.23708070814609528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.754166566593038, + "compression_loss": 0.0, + "distillation_loss": 0.035407066345214844, + "epoch": 1.83, + "learning_rate": 4.2031637481941954e-05, + "loss": 0.0332, + "step": 1930, + "task_loss": 0.013283960521221161 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.754187733510841, + "compression_loss": 0.0, + "distillation_loss": 0.1811191439628601, + "epoch": 1.83, + "learning_rate": 4.202383590729905e-05, + "loss": 0.1727, + "step": 1931, + "task_loss": 0.09702938795089722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7542088939107325, + "compression_loss": 0.0, + "distillation_loss": 0.10961516201496124, + "epoch": 1.83, + "learning_rate": 4.201603124019436e-05, + "loss": 0.1087, + "step": 1932, + "task_loss": 0.10052899271249771 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.754230047793716, + "compression_loss": 0.0, + "distillation_loss": 0.249653160572052, + "epoch": 1.84, + "learning_rate": 4.200822348204565e-05, + "loss": 0.2377, + "step": 1933, + "task_loss": 0.12971001863479614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7542511951607956, + "compression_loss": 0.0, + "distillation_loss": 0.12614881992340088, + "epoch": 1.84, + "learning_rate": 4.200041263427123e-05, + "loss": 0.1307, + "step": 1934, + "task_loss": 0.17151018977165222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7542723360129746, + "compression_loss": 0.0, + "distillation_loss": 0.18562328815460205, + "epoch": 1.84, + "learning_rate": 4.199259869828998e-05, + "loss": 0.1845, + "step": 1935, + "task_loss": 0.1747492104768753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7542934703512568, + "compression_loss": 0.0, + "distillation_loss": 0.07814265787601471, + "epoch": 1.84, + "learning_rate": 4.1984781675521345e-05, + "loss": 0.0725, + "step": 1936, + "task_loss": 0.021998286247253418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7543145981766459, + "compression_loss": 0.0, + "distillation_loss": 0.06505021452903748, + "epoch": 1.84, + "learning_rate": 4.1976961567385306e-05, + "loss": 0.0609, + "step": 1937, + "task_loss": 0.02330428548157215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7543357194901457, + "compression_loss": 0.0, + "distillation_loss": 0.043614309281110764, + "epoch": 1.84, + "learning_rate": 4.1969138375302445e-05, + "loss": 0.0402, + "step": 1938, + "task_loss": 0.009535277262330055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7543568342927597, + "compression_loss": 0.0, + "distillation_loss": 0.12613525986671448, + "epoch": 1.84, + "learning_rate": 4.1961312100693874e-05, + "loss": 0.1151, + "step": 1939, + "task_loss": 0.016281738877296448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7543779425854915, + "compression_loss": 0.0, + "distillation_loss": 0.23481710255146027, + "epoch": 1.84, + "learning_rate": 4.1953482744981274e-05, + "loss": 0.2348, + "step": 1940, + "task_loss": 0.23426470160484314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7543990443693451, + "compression_loss": 0.0, + "distillation_loss": 0.17542167007923126, + "epoch": 1.84, + "learning_rate": 4.194565030958688e-05, + "loss": 0.1797, + "step": 1941, + "task_loss": 0.2181302309036255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.754420139645324, + "compression_loss": 0.0, + "distillation_loss": 0.04640250653028488, + "epoch": 1.84, + "learning_rate": 4.19378147959335e-05, + "loss": 0.0431, + "step": 1942, + "task_loss": 0.013633305206894875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7544412284144317, + "compression_loss": 0.0, + "distillation_loss": 0.09803412854671478, + "epoch": 1.85, + "learning_rate": 4.192997620544449e-05, + "loss": 0.099, + "step": 1943, + "task_loss": 0.10792693495750427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7544623106776723, + "compression_loss": 0.0, + "distillation_loss": 0.1254318654537201, + "epoch": 1.85, + "learning_rate": 4.192213453954377e-05, + "loss": 0.1264, + "step": 1944, + "task_loss": 0.13558337092399597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7544833864360492, + "compression_loss": 0.0, + "distillation_loss": 0.028305238112807274, + "epoch": 1.85, + "learning_rate": 4.19142897996558e-05, + "loss": 0.026, + "step": 1945, + "task_loss": 0.005378095433115959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7545044556905661, + "compression_loss": 0.0, + "distillation_loss": 0.08470214158296585, + "epoch": 1.85, + "learning_rate": 4.190644198720563e-05, + "loss": 0.0819, + "step": 1946, + "task_loss": 0.05680760368704796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7545255184422267, + "compression_loss": 0.0, + "distillation_loss": 0.2368120402097702, + "epoch": 1.85, + "learning_rate": 4.189859110361886e-05, + "loss": 0.231, + "step": 1947, + "task_loss": 0.17879217863082886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7545465746920347, + "compression_loss": 0.0, + "distillation_loss": 0.12444418668746948, + "epoch": 1.85, + "learning_rate": 4.189073715032163e-05, + "loss": 0.1211, + "step": 1948, + "task_loss": 0.0914890468120575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7545676244409938, + "compression_loss": 0.0, + "distillation_loss": 0.1032329723238945, + "epoch": 1.85, + "learning_rate": 4.188288012874065e-05, + "loss": 0.1, + "step": 1949, + "task_loss": 0.07098895311355591 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7545886676901077, + "compression_loss": 0.0, + "distillation_loss": 0.6745878458023071, + "epoch": 1.85, + "learning_rate": 4.187502004030318e-05, + "loss": 0.6498, + "step": 1950, + "task_loss": 0.4266480803489685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.75460970444038, + "compression_loss": 0.0, + "distillation_loss": 0.2606114149093628, + "epoch": 1.85, + "learning_rate": 4.186715688643705e-05, + "loss": 0.2448, + "step": 1951, + "task_loss": 0.10222074389457703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7546307346928145, + "compression_loss": 0.0, + "distillation_loss": 0.07269393652677536, + "epoch": 1.85, + "learning_rate": 4.185929066857064e-05, + "loss": 0.0787, + "step": 1952, + "task_loss": 0.1328985095024109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7546517584484147, + "compression_loss": 0.0, + "distillation_loss": 0.03857032582163811, + "epoch": 1.85, + "learning_rate": 4.1851421388132886e-05, + "loss": 0.0352, + "step": 1953, + "task_loss": 0.004427826032042503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7546727757081845, + "compression_loss": 0.0, + "distillation_loss": 0.025319568812847137, + "epoch": 1.86, + "learning_rate": 4.1843549046553284e-05, + "loss": 0.0232, + "step": 1954, + "task_loss": 0.004016607999801636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7546937864731275, + "compression_loss": 0.0, + "distillation_loss": 0.14767573773860931, + "epoch": 1.86, + "learning_rate": 4.183567364526186e-05, + "loss": 0.1589, + "step": 1955, + "task_loss": 0.2602040767669678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7547147907442472, + "compression_loss": 0.0, + "distillation_loss": 0.19196276366710663, + "epoch": 1.86, + "learning_rate": 4.182779518568926e-05, + "loss": 0.1933, + "step": 1956, + "task_loss": 0.2052653729915619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7547357885225475, + "compression_loss": 0.0, + "distillation_loss": 0.168940469622612, + "epoch": 1.86, + "learning_rate": 4.181991366926661e-05, + "loss": 0.1697, + "step": 1957, + "task_loss": 0.1767549216747284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7547567798090321, + "compression_loss": 0.0, + "distillation_loss": 0.06519413739442825, + "epoch": 1.86, + "learning_rate": 4.181202909742564e-05, + "loss": 0.0633, + "step": 1958, + "task_loss": 0.04639093205332756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7547777646047046, + "compression_loss": 0.0, + "distillation_loss": 0.23005223274230957, + "epoch": 1.86, + "learning_rate": 4.1804141471598604e-05, + "loss": 0.22, + "step": 1959, + "task_loss": 0.1295807808637619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7547987429105686, + "compression_loss": 0.0, + "distillation_loss": 0.15140117704868317, + "epoch": 1.86, + "learning_rate": 4.179625079321836e-05, + "loss": 0.1443, + "step": 1960, + "task_loss": 0.08072268962860107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.754819714727628, + "compression_loss": 0.0, + "distillation_loss": 0.15924590826034546, + "epoch": 1.86, + "learning_rate": 4.1788357063718254e-05, + "loss": 0.1564, + "step": 1961, + "task_loss": 0.1309977024793625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7548406800568862, + "compression_loss": 0.0, + "distillation_loss": 0.09538164734840393, + "epoch": 1.86, + "learning_rate": 4.178046028453224e-05, + "loss": 0.091, + "step": 1962, + "task_loss": 0.05167919024825096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7548616388993471, + "compression_loss": 0.0, + "distillation_loss": 0.12580883502960205, + "epoch": 1.86, + "learning_rate": 4.1772560457094795e-05, + "loss": 0.123, + "step": 1963, + "task_loss": 0.09781771153211594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7548825912560143, + "compression_loss": 0.0, + "distillation_loss": 0.06123030185699463, + "epoch": 1.87, + "learning_rate": 4.1764657582840965e-05, + "loss": 0.0621, + "step": 1964, + "task_loss": 0.069994255900383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7549035371278915, + "compression_loss": 0.0, + "distillation_loss": 0.09012752026319504, + "epoch": 1.87, + "learning_rate": 4.175675166320635e-05, + "loss": 0.0856, + "step": 1965, + "task_loss": 0.045101843774318695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7549244765159824, + "compression_loss": 0.0, + "distillation_loss": 0.18190321326255798, + "epoch": 1.87, + "learning_rate": 4.1748842699627094e-05, + "loss": 0.178, + "step": 1966, + "task_loss": 0.14312417805194855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7549454094212906, + "compression_loss": 0.0, + "distillation_loss": 0.2552461326122284, + "epoch": 1.87, + "learning_rate": 4.17409306935399e-05, + "loss": 0.2553, + "step": 1967, + "task_loss": 0.25603681802749634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.75496633584482, + "compression_loss": 0.0, + "distillation_loss": 0.05597781017422676, + "epoch": 1.87, + "learning_rate": 4.173301564638201e-05, + "loss": 0.0518, + "step": 1968, + "task_loss": 0.014608925208449364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.754987255787574, + "compression_loss": 0.0, + "distillation_loss": 0.24270398914813995, + "epoch": 1.87, + "learning_rate": 4.1725097559591256e-05, + "loss": 0.2384, + "step": 1969, + "task_loss": 0.19955337047576904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7550081692505564, + "compression_loss": 0.0, + "distillation_loss": 0.07123062014579773, + "epoch": 1.87, + "learning_rate": 4.1717176434605967e-05, + "loss": 0.0911, + "step": 1970, + "task_loss": 0.2696712017059326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7550290762347709, + "compression_loss": 0.0, + "distillation_loss": 0.07343777269124985, + "epoch": 1.87, + "learning_rate": 4.170925227286508e-05, + "loss": 0.0777, + "step": 1971, + "task_loss": 0.11573462188243866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7550499767412212, + "compression_loss": 0.0, + "distillation_loss": 0.45374932885169983, + "epoch": 1.87, + "learning_rate": 4.170132507580803e-05, + "loss": 0.4348, + "step": 1972, + "task_loss": 0.26441484689712524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.755070870770911, + "compression_loss": 0.0, + "distillation_loss": 0.07927072793245316, + "epoch": 1.87, + "learning_rate": 4.1693394844874856e-05, + "loss": 0.0938, + "step": 1973, + "task_loss": 0.22447620332241058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7550917583248438, + "compression_loss": 0.0, + "distillation_loss": 0.10215029865503311, + "epoch": 1.87, + "learning_rate": 4.1685461581506115e-05, + "loss": 0.1084, + "step": 1974, + "task_loss": 0.16435641050338745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7551126394040235, + "compression_loss": 0.0, + "distillation_loss": 0.1036095917224884, + "epoch": 1.88, + "learning_rate": 4.167752528714291e-05, + "loss": 0.096, + "step": 1975, + "task_loss": 0.027509452775120735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7551335140094537, + "compression_loss": 0.0, + "distillation_loss": 0.19809332489967346, + "epoch": 1.88, + "learning_rate": 4.166958596322692e-05, + "loss": 0.1908, + "step": 1976, + "task_loss": 0.1248994842171669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.755154382142138, + "compression_loss": 0.0, + "distillation_loss": 0.10628242045640945, + "epoch": 1.88, + "learning_rate": 4.1661643611200366e-05, + "loss": 0.1052, + "step": 1977, + "task_loss": 0.09579187631607056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7551752438030803, + "compression_loss": 0.0, + "distillation_loss": 0.3265897333621979, + "epoch": 1.88, + "learning_rate": 4.1653698232506e-05, + "loss": 0.311, + "step": 1978, + "task_loss": 0.17026562988758087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7551960989932841, + "compression_loss": 0.0, + "distillation_loss": 0.44924837350845337, + "epoch": 1.88, + "learning_rate": 4.1645749828587145e-05, + "loss": 0.4407, + "step": 1979, + "task_loss": 0.36339446902275085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7552169477137531, + "compression_loss": 0.0, + "distillation_loss": 0.2017005980014801, + "epoch": 1.88, + "learning_rate": 4.1637798400887674e-05, + "loss": 0.1914, + "step": 1980, + "task_loss": 0.09869912266731262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7552377899654911, + "compression_loss": 0.0, + "distillation_loss": 0.24423138797283173, + "epoch": 1.88, + "learning_rate": 4.162984395085198e-05, + "loss": 0.2326, + "step": 1981, + "task_loss": 0.12757530808448792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7552586257495015, + "compression_loss": 0.0, + "distillation_loss": 0.06164749711751938, + "epoch": 1.88, + "learning_rate": 4.162188647992506e-05, + "loss": 0.0573, + "step": 1982, + "task_loss": 0.018155789002776146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7552794550667884, + "compression_loss": 0.0, + "distillation_loss": 0.3318878710269928, + "epoch": 1.88, + "learning_rate": 4.161392598955239e-05, + "loss": 0.3315, + "step": 1983, + "task_loss": 0.328228622674942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7553002779183551, + "compression_loss": 0.0, + "distillation_loss": 0.13821963965892792, + "epoch": 1.88, + "learning_rate": 4.160596248118007e-05, + "loss": 0.1504, + "step": 1984, + "task_loss": 0.25973179936408997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7553210943052054, + "compression_loss": 0.0, + "distillation_loss": 0.11968272924423218, + "epoch": 1.89, + "learning_rate": 4.159799595625468e-05, + "loss": 0.1284, + "step": 1985, + "task_loss": 0.20697957277297974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7553419042283431, + "compression_loss": 0.0, + "distillation_loss": 0.14230716228485107, + "epoch": 1.89, + "learning_rate": 4.159002641622338e-05, + "loss": 0.1497, + "step": 1986, + "task_loss": 0.21655035018920898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7553627076887719, + "compression_loss": 0.0, + "distillation_loss": 0.053881511092185974, + "epoch": 1.89, + "learning_rate": 4.1582053862533895e-05, + "loss": 0.0549, + "step": 1987, + "task_loss": 0.06384479999542236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7553835046874953, + "compression_loss": 0.0, + "distillation_loss": 0.16238471865653992, + "epoch": 1.89, + "learning_rate": 4.157407829663446e-05, + "loss": 0.1679, + "step": 1988, + "task_loss": 0.21752187609672546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7554042952255171, + "compression_loss": 0.0, + "distillation_loss": 0.08790767192840576, + "epoch": 1.89, + "learning_rate": 4.1566099719973884e-05, + "loss": 0.084, + "step": 1989, + "task_loss": 0.04870061203837395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7554250793038408, + "compression_loss": 0.0, + "distillation_loss": 0.12433084100484848, + "epoch": 1.89, + "learning_rate": 4.1558118134001514e-05, + "loss": 0.1188, + "step": 1990, + "task_loss": 0.06881560385227203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7554458569234704, + "compression_loss": 0.0, + "distillation_loss": 0.19472339749336243, + "epoch": 1.89, + "learning_rate": 4.155013354016723e-05, + "loss": 0.1847, + "step": 1991, + "task_loss": 0.0943620428442955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7554666280854094, + "compression_loss": 0.0, + "distillation_loss": 0.12036952376365662, + "epoch": 1.89, + "learning_rate": 4.154214593992149e-05, + "loss": 0.1135, + "step": 1992, + "task_loss": 0.05154300481081009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7554873927906615, + "compression_loss": 0.0, + "distillation_loss": 0.16188105940818787, + "epoch": 1.89, + "learning_rate": 4.1534155334715264e-05, + "loss": 0.1657, + "step": 1993, + "task_loss": 0.20030921697616577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7555081510402303, + "compression_loss": 0.0, + "distillation_loss": 0.03598017245531082, + "epoch": 1.89, + "learning_rate": 4.15261617260001e-05, + "loss": 0.0422, + "step": 1994, + "task_loss": 0.09828056395053864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7555289028351195, + "compression_loss": 0.0, + "distillation_loss": 0.07038508355617523, + "epoch": 1.89, + "learning_rate": 4.151816511522807e-05, + "loss": 0.0708, + "step": 1995, + "task_loss": 0.07408405095338821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.755549648176333, + "compression_loss": 0.0, + "distillation_loss": 0.0785357877612114, + "epoch": 1.9, + "learning_rate": 4.151016550385179e-05, + "loss": 0.0895, + "step": 1996, + "task_loss": 0.18811197578907013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7555703870648742, + "compression_loss": 0.0, + "distillation_loss": 0.06988924741744995, + "epoch": 1.9, + "learning_rate": 4.150216289332443e-05, + "loss": 0.0647, + "step": 1997, + "task_loss": 0.018136270344257355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7555911195017471, + "compression_loss": 0.0, + "distillation_loss": 0.11623725295066833, + "epoch": 1.9, + "learning_rate": 4.149415728509971e-05, + "loss": 0.1145, + "step": 1998, + "task_loss": 0.09836913645267487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.755611845487955, + "compression_loss": 0.0, + "distillation_loss": 0.0843459814786911, + "epoch": 1.9, + "learning_rate": 4.1486148680631875e-05, + "loss": 0.0788, + "step": 1999, + "task_loss": 0.028761208057403564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7556325650245018, + "compression_loss": 0.0, + "distillation_loss": 0.16870735585689545, + "epoch": 1.9, + "learning_rate": 4.147813708137574e-05, + "loss": 0.1591, + "step": 2000, + "task_loss": 0.07283841073513031 + }, + { + "epoch": 1.9, + "eval_accuracy": 0.9013761467889908, + "eval_loss": 0.3940832018852234, + "eval_runtime": 18.3803, + "eval_samples_per_second": 47.442, + "eval_steps_per_second": 5.93, + "step": 2000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7556532781123911, + "compression_loss": 0.0, + "distillation_loss": 0.2963669002056122, + "epoch": 1.9, + "learning_rate": 4.1470122488786645e-05, + "loss": 0.2864, + "step": 2001, + "task_loss": 0.1968749761581421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7556739847526267, + "compression_loss": 0.0, + "distillation_loss": 0.05983327701687813, + "epoch": 1.9, + "learning_rate": 4.146210490432048e-05, + "loss": 0.0605, + "step": 2002, + "task_loss": 0.06697780638933182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7556946849462123, + "compression_loss": 0.0, + "distillation_loss": 0.07671034336090088, + "epoch": 1.9, + "learning_rate": 4.1454084329433674e-05, + "loss": 0.0927, + "step": 2003, + "task_loss": 0.23632298409938812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7557153786941514, + "compression_loss": 0.0, + "distillation_loss": 0.14522545039653778, + "epoch": 1.9, + "learning_rate": 4.144606076558321e-05, + "loss": 0.1386, + "step": 2004, + "task_loss": 0.07935189455747604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7557360659974478, + "compression_loss": 0.0, + "distillation_loss": 0.25535622239112854, + "epoch": 1.9, + "learning_rate": 4.14380342142266e-05, + "loss": 0.2454, + "step": 2005, + "task_loss": 0.1559627205133438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7557567468571051, + "compression_loss": 0.0, + "distillation_loss": 0.09220820665359497, + "epoch": 1.91, + "learning_rate": 4.14300046768219e-05, + "loss": 0.0889, + "step": 2006, + "task_loss": 0.05925717204809189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7557774212741272, + "compression_loss": 0.0, + "distillation_loss": 0.21450912952423096, + "epoch": 1.91, + "learning_rate": 4.1421972154827724e-05, + "loss": 0.2048, + "step": 2007, + "task_loss": 0.11772306263446808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7557980892495175, + "compression_loss": 0.0, + "distillation_loss": 0.16318204998970032, + "epoch": 1.91, + "learning_rate": 4.141393664970323e-05, + "loss": 0.1539, + "step": 2008, + "task_loss": 0.0701281875371933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7558187507842798, + "compression_loss": 0.0, + "distillation_loss": 0.0389268696308136, + "epoch": 1.91, + "learning_rate": 4.140589816290808e-05, + "loss": 0.0364, + "step": 2009, + "task_loss": 0.013847017660737038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7558394058794179, + "compression_loss": 0.0, + "distillation_loss": 0.047759756445884705, + "epoch": 1.91, + "learning_rate": 4.1397856695902535e-05, + "loss": 0.085, + "step": 2010, + "task_loss": 0.4196968376636505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7558600545359353, + "compression_loss": 0.0, + "distillation_loss": 0.0479925200343132, + "epoch": 1.91, + "learning_rate": 4.138981225014733e-05, + "loss": 0.0445, + "step": 2011, + "task_loss": 0.012897208333015442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7558806967548358, + "compression_loss": 0.0, + "distillation_loss": 0.05814354866743088, + "epoch": 1.91, + "learning_rate": 4.1381764827103806e-05, + "loss": 0.0544, + "step": 2012, + "task_loss": 0.0203425120562315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.755901332537123, + "compression_loss": 0.0, + "distillation_loss": 0.09011336416006088, + "epoch": 1.91, + "learning_rate": 4.13737144282338e-05, + "loss": 0.0902, + "step": 2013, + "task_loss": 0.09071313589811325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7559219618838006, + "compression_loss": 0.0, + "distillation_loss": 0.055356744676828384, + "epoch": 1.91, + "learning_rate": 4.1365661054999715e-05, + "loss": 0.0554, + "step": 2014, + "task_loss": 0.05614471435546875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7559425847958723, + "compression_loss": 0.0, + "distillation_loss": 0.1986648440361023, + "epoch": 1.91, + "learning_rate": 4.1357604708864475e-05, + "loss": 0.2015, + "step": 2015, + "task_loss": 0.22709903120994568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7559632012743418, + "compression_loss": 0.0, + "distillation_loss": 0.1105402335524559, + "epoch": 1.91, + "learning_rate": 4.1349545391291563e-05, + "loss": 0.1051, + "step": 2016, + "task_loss": 0.05623643100261688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7559838113202129, + "compression_loss": 0.0, + "distillation_loss": 0.12236940860748291, + "epoch": 1.92, + "learning_rate": 4.1341483103745006e-05, + "loss": 0.1174, + "step": 2017, + "task_loss": 0.07233867049217224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7560044149344889, + "compression_loss": 0.0, + "distillation_loss": 0.17869043350219727, + "epoch": 1.92, + "learning_rate": 4.133341784768933e-05, + "loss": 0.1765, + "step": 2018, + "task_loss": 0.156291663646698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7560250121181739, + "compression_loss": 0.0, + "distillation_loss": 0.28349682688713074, + "epoch": 1.92, + "learning_rate": 4.132534962458962e-05, + "loss": 0.2765, + "step": 2019, + "task_loss": 0.21306130290031433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7560456028722713, + "compression_loss": 0.0, + "distillation_loss": 0.10640447586774826, + "epoch": 1.92, + "learning_rate": 4.131727843591155e-05, + "loss": 0.1149, + "step": 2020, + "task_loss": 0.1911725401878357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.756066187197785, + "compression_loss": 0.0, + "distillation_loss": 0.13865335285663605, + "epoch": 1.92, + "learning_rate": 4.130920428312127e-05, + "loss": 0.1432, + "step": 2021, + "task_loss": 0.18457633256912231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7560867650957186, + "compression_loss": 0.0, + "distillation_loss": 0.06674148142337799, + "epoch": 1.92, + "learning_rate": 4.130112716768548e-05, + "loss": 0.081, + "step": 2022, + "task_loss": 0.2092704176902771 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7561073365670756, + "compression_loss": 0.0, + "distillation_loss": 0.03668634220957756, + "epoch": 1.92, + "learning_rate": 4.129304709107143e-05, + "loss": 0.0345, + "step": 2023, + "task_loss": 0.014327209442853928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7561279016128599, + "compression_loss": 0.0, + "distillation_loss": 0.08399326354265213, + "epoch": 1.92, + "learning_rate": 4.128496405474691e-05, + "loss": 0.0867, + "step": 2024, + "task_loss": 0.11154383420944214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7561484602340752, + "compression_loss": 0.0, + "distillation_loss": 0.16036614775657654, + "epoch": 1.92, + "learning_rate": 4.127687806018024e-05, + "loss": 0.1671, + "step": 2025, + "task_loss": 0.22741574048995972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7561690124317251, + "compression_loss": 0.0, + "distillation_loss": 0.02608412131667137, + "epoch": 1.92, + "learning_rate": 4.1268789108840275e-05, + "loss": 0.0239, + "step": 2026, + "task_loss": 0.004486914724111557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7561895582068132, + "compression_loss": 0.0, + "distillation_loss": 0.04416799545288086, + "epoch": 1.92, + "learning_rate": 4.126069720219642e-05, + "loss": 0.0411, + "step": 2027, + "task_loss": 0.013907143846154213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7562100975603433, + "compression_loss": 0.0, + "distillation_loss": 0.24007698893547058, + "epoch": 1.93, + "learning_rate": 4.125260234171861e-05, + "loss": 0.2428, + "step": 2028, + "task_loss": 0.2669823169708252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7562306304933191, + "compression_loss": 0.0, + "distillation_loss": 0.19312305748462677, + "epoch": 1.93, + "learning_rate": 4.12445045288773e-05, + "loss": 0.1849, + "step": 2029, + "task_loss": 0.11117222160100937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7562511570067442, + "compression_loss": 0.0, + "distillation_loss": 0.3301912546157837, + "epoch": 1.93, + "learning_rate": 4.123640376514353e-05, + "loss": 0.316, + "step": 2030, + "task_loss": 0.18828773498535156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7562716771016224, + "compression_loss": 0.0, + "distillation_loss": 0.022117741405963898, + "epoch": 1.93, + "learning_rate": 4.12283000519888e-05, + "loss": 0.0202, + "step": 2031, + "task_loss": 0.002740517258644104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7562921907789572, + "compression_loss": 0.0, + "distillation_loss": 0.2477855086326599, + "epoch": 1.93, + "learning_rate": 4.122019339088522e-05, + "loss": 0.2442, + "step": 2032, + "task_loss": 0.2123817652463913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7563126980397523, + "compression_loss": 0.0, + "distillation_loss": 0.06262822449207306, + "epoch": 1.93, + "learning_rate": 4.121208378330539e-05, + "loss": 0.0674, + "step": 2033, + "task_loss": 0.11077867448329926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7563331988850116, + "compression_loss": 0.0, + "distillation_loss": 0.12019616365432739, + "epoch": 1.93, + "learning_rate": 4.120397123072246e-05, + "loss": 0.1144, + "step": 2034, + "task_loss": 0.06211775913834572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7563536933157385, + "compression_loss": 0.0, + "distillation_loss": 0.09517644345760345, + "epoch": 1.93, + "learning_rate": 4.119585573461012e-05, + "loss": 0.087, + "step": 2035, + "task_loss": 0.013532513752579689 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.756374181332937, + "compression_loss": 0.0, + "distillation_loss": 0.1831834316253662, + "epoch": 1.93, + "learning_rate": 4.118773729644258e-05, + "loss": 0.1754, + "step": 2036, + "task_loss": 0.10511328279972076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7563946629376105, + "compression_loss": 0.0, + "distillation_loss": 0.17582698166370392, + "epoch": 1.93, + "learning_rate": 4.11796159176946e-05, + "loss": 0.1714, + "step": 2037, + "task_loss": 0.13160312175750732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7564151381307629, + "compression_loss": 0.0, + "distillation_loss": 0.19995486736297607, + "epoch": 1.94, + "learning_rate": 4.117149159984147e-05, + "loss": 0.1948, + "step": 2038, + "task_loss": 0.14824716746807098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7564356069133976, + "compression_loss": 0.0, + "distillation_loss": 0.33650684356689453, + "epoch": 1.94, + "learning_rate": 4.116336434435901e-05, + "loss": 0.3217, + "step": 2039, + "task_loss": 0.1889341026544571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7564560692865185, + "compression_loss": 0.0, + "distillation_loss": 0.22618404030799866, + "epoch": 1.94, + "learning_rate": 4.115523415272358e-05, + "loss": 0.2265, + "step": 2040, + "task_loss": 0.2290044128894806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7564765252511294, + "compression_loss": 0.0, + "distillation_loss": 0.08555667102336884, + "epoch": 1.94, + "learning_rate": 4.1147101026412046e-05, + "loss": 0.0885, + "step": 2041, + "task_loss": 0.11483641713857651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7564969748082336, + "compression_loss": 0.0, + "distillation_loss": 0.17309504747390747, + "epoch": 1.94, + "learning_rate": 4.1138964966901853e-05, + "loss": 0.1651, + "step": 2042, + "task_loss": 0.09353820979595184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7565174179588352, + "compression_loss": 0.0, + "distillation_loss": 0.10289028286933899, + "epoch": 1.94, + "learning_rate": 4.113082597567095e-05, + "loss": 0.1063, + "step": 2043, + "task_loss": 0.1365654170513153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7565378547039375, + "compression_loss": 0.0, + "distillation_loss": 0.03402677923440933, + "epoch": 1.94, + "learning_rate": 4.112268405419782e-05, + "loss": 0.0313, + "step": 2044, + "task_loss": 0.006699586287140846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7565582850445445, + "compression_loss": 0.0, + "distillation_loss": 0.26117533445358276, + "epoch": 1.94, + "learning_rate": 4.1114539203961476e-05, + "loss": 0.2482, + "step": 2045, + "task_loss": 0.13159014284610748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7565787089816597, + "compression_loss": 0.0, + "distillation_loss": 0.1347775161266327, + "epoch": 1.94, + "learning_rate": 4.110639142644149e-05, + "loss": 0.1373, + "step": 2046, + "task_loss": 0.16003359854221344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7565991265162868, + "compression_loss": 0.0, + "distillation_loss": 0.2649126648902893, + "epoch": 1.94, + "learning_rate": 4.109824072311792e-05, + "loss": 0.2607, + "step": 2047, + "task_loss": 0.2226344645023346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7566195376494295, + "compression_loss": 0.0, + "distillation_loss": 0.21198835968971252, + "epoch": 1.94, + "learning_rate": 4.10900870954714e-05, + "loss": 0.2042, + "step": 2048, + "task_loss": 0.13424277305603027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7566399423820915, + "compression_loss": 0.0, + "distillation_loss": 0.16852621734142303, + "epoch": 1.95, + "learning_rate": 4.108193054498307e-05, + "loss": 0.164, + "step": 2049, + "task_loss": 0.1233605444431305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7566603407152765, + "compression_loss": 0.0, + "distillation_loss": 0.08903372287750244, + "epoch": 1.95, + "learning_rate": 4.10737710731346e-05, + "loss": 0.0815, + "step": 2050, + "task_loss": 0.013720404356718063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7566807326499883, + "compression_loss": 0.0, + "distillation_loss": 0.1362341046333313, + "epoch": 1.95, + "learning_rate": 4.106560868140821e-05, + "loss": 0.1378, + "step": 2051, + "task_loss": 0.15231622755527496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7567011181872302, + "compression_loss": 0.0, + "distillation_loss": 0.12862615287303925, + "epoch": 1.95, + "learning_rate": 4.105744337128662e-05, + "loss": 0.1334, + "step": 2052, + "task_loss": 0.17673933506011963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7567214973280062, + "compression_loss": 0.0, + "distillation_loss": 0.12414275854825974, + "epoch": 1.95, + "learning_rate": 4.104927514425312e-05, + "loss": 0.1193, + "step": 2053, + "task_loss": 0.07598595321178436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7567418700733199, + "compression_loss": 0.0, + "distillation_loss": 0.08933214098215103, + "epoch": 1.95, + "learning_rate": 4.104110400179148e-05, + "loss": 0.0928, + "step": 2054, + "task_loss": 0.12388118356466293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7567622364241751, + "compression_loss": 0.0, + "distillation_loss": 0.14208053052425385, + "epoch": 1.95, + "learning_rate": 4.103292994538605e-05, + "loss": 0.1349, + "step": 2055, + "task_loss": 0.07043315470218658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7567825963815752, + "compression_loss": 0.0, + "distillation_loss": 0.26003506779670715, + "epoch": 1.95, + "learning_rate": 4.102475297652168e-05, + "loss": 0.2527, + "step": 2056, + "task_loss": 0.18638324737548828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7568029499465241, + "compression_loss": 0.0, + "distillation_loss": 0.13777679204940796, + "epoch": 1.95, + "learning_rate": 4.1016573096683765e-05, + "loss": 0.1306, + "step": 2057, + "task_loss": 0.06621996313333511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7568232971200255, + "compression_loss": 0.0, + "distillation_loss": 0.05839243531227112, + "epoch": 1.95, + "learning_rate": 4.10083903073582e-05, + "loss": 0.053, + "step": 2058, + "task_loss": 0.004214171320199966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.756843637903083, + "compression_loss": 0.0, + "distillation_loss": 0.3341011106967926, + "epoch": 1.96, + "learning_rate": 4.1000204610031447e-05, + "loss": 0.3197, + "step": 2059, + "task_loss": 0.18979643285274506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7568639722967002, + "compression_loss": 0.0, + "distillation_loss": 0.06705968081951141, + "epoch": 1.96, + "learning_rate": 4.0992016006190456e-05, + "loss": 0.0707, + "step": 2060, + "task_loss": 0.10322168469429016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7568843003018809, + "compression_loss": 0.0, + "distillation_loss": 0.047776952385902405, + "epoch": 1.96, + "learning_rate": 4.0983824497322755e-05, + "loss": 0.0474, + "step": 2061, + "task_loss": 0.04416225105524063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7569046219196288, + "compression_loss": 0.0, + "distillation_loss": 0.10682905465364456, + "epoch": 1.96, + "learning_rate": 4.0975630084916344e-05, + "loss": 0.1004, + "step": 2062, + "task_loss": 0.042584970593452454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7569249371509476, + "compression_loss": 0.0, + "distillation_loss": 0.14277291297912598, + "epoch": 1.96, + "learning_rate": 4.096743277045979e-05, + "loss": 0.1367, + "step": 2063, + "task_loss": 0.08212710171937943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7569452459968409, + "compression_loss": 0.0, + "distillation_loss": 0.20160989463329315, + "epoch": 1.96, + "learning_rate": 4.0959232555442174e-05, + "loss": 0.194, + "step": 2064, + "task_loss": 0.12543994188308716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7569655484583123, + "compression_loss": 0.0, + "distillation_loss": 0.12779472768306732, + "epoch": 1.96, + "learning_rate": 4.0951029441353104e-05, + "loss": 0.1189, + "step": 2065, + "task_loss": 0.03901362046599388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7569858445363656, + "compression_loss": 0.0, + "distillation_loss": 0.19314239919185638, + "epoch": 1.96, + "learning_rate": 4.094282342968271e-05, + "loss": 0.1827, + "step": 2066, + "task_loss": 0.08886859565973282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7570061342320046, + "compression_loss": 0.0, + "distillation_loss": 0.13019533455371857, + "epoch": 1.96, + "learning_rate": 4.093461452192167e-05, + "loss": 0.1232, + "step": 2067, + "task_loss": 0.05998740345239639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7570264175462328, + "compression_loss": 0.0, + "distillation_loss": 0.21118499338626862, + "epoch": 1.96, + "learning_rate": 4.092640271956115e-05, + "loss": 0.2132, + "step": 2068, + "task_loss": 0.23096507787704468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7570466944800539, + "compression_loss": 0.0, + "distillation_loss": 0.09960189461708069, + "epoch": 1.96, + "learning_rate": 4.091818802409288e-05, + "loss": 0.0991, + "step": 2069, + "task_loss": 0.094699427485466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7570669650344717, + "compression_loss": 0.0, + "distillation_loss": 0.09009353071451187, + "epoch": 1.97, + "learning_rate": 4.0909970437009096e-05, + "loss": 0.0879, + "step": 2070, + "task_loss": 0.06863107532262802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7570872292104897, + "compression_loss": 0.0, + "distillation_loss": 0.05240606516599655, + "epoch": 1.97, + "learning_rate": 4.0901749959802546e-05, + "loss": 0.0545, + "step": 2071, + "task_loss": 0.07330939918756485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7571074870091117, + "compression_loss": 0.0, + "distillation_loss": 0.32454681396484375, + "epoch": 1.97, + "learning_rate": 4.0893526593966535e-05, + "loss": 0.3176, + "step": 2072, + "task_loss": 0.25487715005874634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7571277384313414, + "compression_loss": 0.0, + "distillation_loss": 0.1994623839855194, + "epoch": 1.97, + "learning_rate": 4.088530034099488e-05, + "loss": 0.1925, + "step": 2073, + "task_loss": 0.13009962439537048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7571479834781824, + "compression_loss": 0.0, + "distillation_loss": 0.043959006667137146, + "epoch": 1.97, + "learning_rate": 4.087707120238191e-05, + "loss": 0.0632, + "step": 2074, + "task_loss": 0.2364317774772644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7571682221506384, + "compression_loss": 0.0, + "distillation_loss": 0.1748720109462738, + "epoch": 1.97, + "learning_rate": 4.0868839179622495e-05, + "loss": 0.1739, + "step": 2075, + "task_loss": 0.16508643329143524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7571884544497132, + "compression_loss": 0.0, + "distillation_loss": 0.09879680722951889, + "epoch": 1.97, + "learning_rate": 4.086060427421202e-05, + "loss": 0.1112, + "step": 2076, + "task_loss": 0.2223973572254181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7572086803764103, + "compression_loss": 0.0, + "distillation_loss": 0.13838914036750793, + "epoch": 1.97, + "learning_rate": 4.0852366487646384e-05, + "loss": 0.1375, + "step": 2077, + "task_loss": 0.1297679990530014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7572288999317336, + "compression_loss": 0.0, + "distillation_loss": 0.17635372281074524, + "epoch": 1.97, + "learning_rate": 4.084412582142204e-05, + "loss": 0.1725, + "step": 2078, + "task_loss": 0.1377314180135727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7572491131166865, + "compression_loss": 0.0, + "distillation_loss": 0.09490906447172165, + "epoch": 1.97, + "learning_rate": 4.083588227703593e-05, + "loss": 0.1, + "step": 2079, + "task_loss": 0.1462094932794571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.757269319932273, + "compression_loss": 0.0, + "distillation_loss": 0.05555427446961403, + "epoch": 1.98, + "learning_rate": 4.0827635855985534e-05, + "loss": 0.0618, + "step": 2080, + "task_loss": 0.11752540618181229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7572895203794965, + "compression_loss": 0.0, + "distillation_loss": 0.08329576253890991, + "epoch": 1.98, + "learning_rate": 4.081938655976886e-05, + "loss": 0.0857, + "step": 2081, + "task_loss": 0.10723456740379333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7573097144593608, + "compression_loss": 0.0, + "distillation_loss": 0.1329977810382843, + "epoch": 1.98, + "learning_rate": 4.0811134389884433e-05, + "loss": 0.1404, + "step": 2082, + "task_loss": 0.2066575586795807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7573299021728697, + "compression_loss": 0.0, + "distillation_loss": 0.03028569370508194, + "epoch": 1.98, + "learning_rate": 4.08028793478313e-05, + "loss": 0.0541, + "step": 2083, + "task_loss": 0.2682817280292511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7573500835210266, + "compression_loss": 0.0, + "distillation_loss": 0.2019033133983612, + "epoch": 1.98, + "learning_rate": 4.0794621435109015e-05, + "loss": 0.2013, + "step": 2084, + "task_loss": 0.19626502692699432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7573702585048355, + "compression_loss": 0.0, + "distillation_loss": 0.04804403334856033, + "epoch": 1.98, + "learning_rate": 4.0786360653217684e-05, + "loss": 0.049, + "step": 2085, + "task_loss": 0.05735541135072708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7573904271252998, + "compression_loss": 0.0, + "distillation_loss": 0.14371657371520996, + "epoch": 1.98, + "learning_rate": 4.0778097003657915e-05, + "loss": 0.1367, + "step": 2086, + "task_loss": 0.07343168556690216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7574105893834234, + "compression_loss": 0.0, + "distillation_loss": 0.172747403383255, + "epoch": 1.98, + "learning_rate": 4.0769830487930835e-05, + "loss": 0.1739, + "step": 2087, + "task_loss": 0.18447381258010864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7574307452802099, + "compression_loss": 0.0, + "distillation_loss": 0.18645472824573517, + "epoch": 1.98, + "learning_rate": 4.07615611075381e-05, + "loss": 0.1843, + "step": 2088, + "task_loss": 0.16447779536247253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.757450894816663, + "compression_loss": 0.0, + "distillation_loss": 0.09333232790231705, + "epoch": 1.98, + "learning_rate": 4.075328886398188e-05, + "loss": 0.102, + "step": 2089, + "task_loss": 0.17981281876564026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7574710379937862, + "compression_loss": 0.0, + "distillation_loss": 0.1386614441871643, + "epoch": 1.98, + "learning_rate": 4.074501375876487e-05, + "loss": 0.1315, + "step": 2090, + "task_loss": 0.0667913407087326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7574911748125835, + "compression_loss": 0.0, + "distillation_loss": 0.1604781299829483, + "epoch": 1.99, + "learning_rate": 4.073673579339028e-05, + "loss": 0.155, + "step": 2091, + "task_loss": 0.10606161504983902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7575113052740584, + "compression_loss": 0.0, + "distillation_loss": 0.2468898594379425, + "epoch": 1.99, + "learning_rate": 4.0728454969361854e-05, + "loss": 0.25, + "step": 2092, + "task_loss": 0.2778300940990448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7575314293792146, + "compression_loss": 0.0, + "distillation_loss": 0.20150130987167358, + "epoch": 1.99, + "learning_rate": 4.0720171288183815e-05, + "loss": 0.1922, + "step": 2093, + "task_loss": 0.10800500959157944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7575515471290557, + "compression_loss": 0.0, + "distillation_loss": 0.1500411480665207, + "epoch": 1.99, + "learning_rate": 4.0711884751360964e-05, + "loss": 0.1402, + "step": 2094, + "task_loss": 0.051413945853710175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7575716585245855, + "compression_loss": 0.0, + "distillation_loss": 0.052510011941194534, + "epoch": 1.99, + "learning_rate": 4.070359536039858e-05, + "loss": 0.066, + "step": 2095, + "task_loss": 0.18779529631137848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7575917635668078, + "compression_loss": 0.0, + "distillation_loss": 0.2838015854358673, + "epoch": 1.99, + "learning_rate": 4.069530311680247e-05, + "loss": 0.2762, + "step": 2096, + "task_loss": 0.20768359303474426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7576118622567259, + "compression_loss": 0.0, + "distillation_loss": 0.24412603676319122, + "epoch": 1.99, + "learning_rate": 4.068700802207895e-05, + "loss": 0.2467, + "step": 2097, + "task_loss": 0.2701077461242676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7576319545953439, + "compression_loss": 0.0, + "distillation_loss": 0.18422961235046387, + "epoch": 1.99, + "learning_rate": 4.0678710077734885e-05, + "loss": 0.1914, + "step": 2098, + "task_loss": 0.2559583783149719 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7576520405836652, + "compression_loss": 0.0, + "distillation_loss": 0.10283166915178299, + "epoch": 1.99, + "learning_rate": 4.0670409285277614e-05, + "loss": 0.1053, + "step": 2099, + "task_loss": 0.12738347053527832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7576721202226936, + "compression_loss": 0.0, + "distillation_loss": 0.24559719860553741, + "epoch": 1.99, + "learning_rate": 4.0662105646215034e-05, + "loss": 0.2482, + "step": 2100, + "task_loss": 0.271691232919693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7576921935134328, + "compression_loss": 0.0, + "distillation_loss": 0.15227892994880676, + "epoch": 2.0, + "learning_rate": 4.065379916205554e-05, + "loss": 0.1468, + "step": 2101, + "task_loss": 0.09752043336629868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7577122604568864, + "compression_loss": 0.0, + "distillation_loss": 0.0908876582980156, + "epoch": 2.0, + "learning_rate": 4.0645489834308024e-05, + "loss": 0.0848, + "step": 2102, + "task_loss": 0.02978145144879818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7577323210540582, + "compression_loss": 0.0, + "distillation_loss": 0.3263307809829712, + "epoch": 2.0, + "learning_rate": 4.063717766448194e-05, + "loss": 0.3075, + "step": 2103, + "task_loss": 0.1381606012582779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7577523753059517, + "compression_loss": 0.0, + "distillation_loss": 0.15767845511436462, + "epoch": 2.0, + "learning_rate": 4.062886265408722e-05, + "loss": 0.1544, + "step": 2104, + "task_loss": 0.12524300813674927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.7577724232135707, + "compression_loss": 0.0, + "distillation_loss": 0.15870457887649536, + "epoch": 2.0, + "learning_rate": 4.062054480463433e-05, + "loss": 0.1576, + "step": 2105, + "task_loss": 0.14793536067008972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, + "compression/magnitude_sparsity/target_sparsity_level": 0.757792464777919, + "compression_loss": 0.0, + "distillation_loss": 0.016080807894468307, + "epoch": 2.0, + "learning_rate": 4.0612224117634245e-05, + "loss": 0.0148, + "step": 2106, + "task_loss": 0.003325019497424364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7578125, + "compression_loss": 0.0, + "distillation_loss": 0.8665308952331543, + "epoch": 2.0, + "learning_rate": 4.060390059459846e-05, + "loss": 0.8038, + "step": 2107, + "task_loss": 0.23952460289001465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7578325288808176, + "compression_loss": 0.0, + "distillation_loss": 0.775876522064209, + "epoch": 2.0, + "learning_rate": 4.059557423703899e-05, + "loss": 0.7176, + "step": 2108, + "task_loss": 0.1935485601425171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7578525514213754, + "compression_loss": 0.0, + "distillation_loss": 0.8252198696136475, + "epoch": 2.0, + "learning_rate": 4.058724504646834e-05, + "loss": 0.7657, + "step": 2109, + "task_loss": 0.230499267578125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7578725676226772, + "compression_loss": 0.0, + "distillation_loss": 0.6135385036468506, + "epoch": 2.0, + "learning_rate": 4.0578913024399564e-05, + "loss": 0.5737, + "step": 2110, + "task_loss": 0.21483993530273438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7578925774857265, + "compression_loss": 0.0, + "distillation_loss": 0.5455840826034546, + "epoch": 2.0, + "learning_rate": 4.057057817234621e-05, + "loss": 0.5087, + "step": 2111, + "task_loss": 0.17693614959716797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.757912581011527, + "compression_loss": 0.0, + "distillation_loss": 0.43998754024505615, + "epoch": 2.01, + "learning_rate": 4.0562240491822334e-05, + "loss": 0.4114, + "step": 2112, + "task_loss": 0.154291033744812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7579325782010825, + "compression_loss": 0.0, + "distillation_loss": 0.3841041922569275, + "epoch": 2.01, + "learning_rate": 4.055389998434253e-05, + "loss": 0.3626, + "step": 2113, + "task_loss": 0.16882005333900452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7579525690553967, + "compression_loss": 0.0, + "distillation_loss": 0.4492219090461731, + "epoch": 2.01, + "learning_rate": 4.054555665142189e-05, + "loss": 0.4199, + "step": 2114, + "task_loss": 0.15565484762191772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7579725535754731, + "compression_loss": 0.0, + "distillation_loss": 0.3286392390727997, + "epoch": 2.01, + "learning_rate": 4.053721049457601e-05, + "loss": 0.3141, + "step": 2115, + "task_loss": 0.1833527535200119 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7579925317623155, + "compression_loss": 0.0, + "distillation_loss": 0.2822880148887634, + "epoch": 2.01, + "learning_rate": 4.052886151532101e-05, + "loss": 0.2741, + "step": 2116, + "task_loss": 0.2002839893102646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7580125036169276, + "compression_loss": 0.0, + "distillation_loss": 0.30642810463905334, + "epoch": 2.01, + "learning_rate": 4.0520509715173544e-05, + "loss": 0.2994, + "step": 2117, + "task_loss": 0.235684335231781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7580324691403132, + "compression_loss": 0.0, + "distillation_loss": 0.20797675848007202, + "epoch": 2.01, + "learning_rate": 4.051215509565073e-05, + "loss": 0.2032, + "step": 2118, + "task_loss": 0.16037693619728088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7580524283334756, + "compression_loss": 0.0, + "distillation_loss": 0.12344904243946075, + "epoch": 2.01, + "learning_rate": 4.050379765827024e-05, + "loss": 0.1167, + "step": 2119, + "task_loss": 0.056173477321863174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7580723811974189, + "compression_loss": 0.0, + "distillation_loss": 0.1896042823791504, + "epoch": 2.01, + "learning_rate": 4.0495437404550233e-05, + "loss": 0.179, + "step": 2120, + "task_loss": 0.08344145119190216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7580923277331465, + "compression_loss": 0.0, + "distillation_loss": 0.16051128506660461, + "epoch": 2.01, + "learning_rate": 4.04870743360094e-05, + "loss": 0.1519, + "step": 2121, + "task_loss": 0.07446075230836868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7581122679416623, + "compression_loss": 0.0, + "distillation_loss": 0.37767481803894043, + "epoch": 2.02, + "learning_rate": 4.047870845416693e-05, + "loss": 0.3772, + "step": 2122, + "task_loss": 0.3733970522880554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7581322018239698, + "compression_loss": 0.0, + "distillation_loss": 0.21346813440322876, + "epoch": 2.02, + "learning_rate": 4.0470339760542506e-05, + "loss": 0.2017, + "step": 2123, + "task_loss": 0.09529842436313629 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7581521293810727, + "compression_loss": 0.0, + "distillation_loss": 0.6465510129928589, + "epoch": 2.02, + "learning_rate": 4.0461968256656376e-05, + "loss": 0.6242, + "step": 2124, + "task_loss": 0.42273980379104614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7581720506139747, + "compression_loss": 0.0, + "distillation_loss": 0.4466746747493744, + "epoch": 2.02, + "learning_rate": 4.045359394402925e-05, + "loss": 0.4386, + "step": 2125, + "task_loss": 0.366089791059494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7581919655236797, + "compression_loss": 0.0, + "distillation_loss": 0.05343036353588104, + "epoch": 2.02, + "learning_rate": 4.0445216824182344e-05, + "loss": 0.0488, + "step": 2126, + "task_loss": 0.006774421781301498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7582118741111911, + "compression_loss": 0.0, + "distillation_loss": 0.042159806936979294, + "epoch": 2.02, + "learning_rate": 4.043683689863742e-05, + "loss": 0.0449, + "step": 2127, + "task_loss": 0.06956613063812256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7582317763775126, + "compression_loss": 0.0, + "distillation_loss": 0.3928431272506714, + "epoch": 2.02, + "learning_rate": 4.042845416891673e-05, + "loss": 0.3936, + "step": 2128, + "task_loss": 0.3999135494232178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7582516723236481, + "compression_loss": 0.0, + "distillation_loss": 0.22617900371551514, + "epoch": 2.02, + "learning_rate": 4.042006863654303e-05, + "loss": 0.245, + "step": 2129, + "task_loss": 0.4148496389389038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.758271561950601, + "compression_loss": 0.0, + "distillation_loss": 0.21618734300136566, + "epoch": 2.02, + "learning_rate": 4.041168030303961e-05, + "loss": 0.207, + "step": 2130, + "task_loss": 0.12459345906972885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7582914452593752, + "compression_loss": 0.0, + "distillation_loss": 0.398822546005249, + "epoch": 2.02, + "learning_rate": 4.0403289169930235e-05, + "loss": 0.3847, + "step": 2131, + "task_loss": 0.2576183080673218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7583113222509743, + "compression_loss": 0.0, + "distillation_loss": 0.16397595405578613, + "epoch": 2.02, + "learning_rate": 4.03948952387392e-05, + "loss": 0.1636, + "step": 2132, + "task_loss": 0.15986183285713196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.758331192926402, + "compression_loss": 0.0, + "distillation_loss": 0.19217489659786224, + "epoch": 2.03, + "learning_rate": 4.03864985109913e-05, + "loss": 0.1823, + "step": 2133, + "task_loss": 0.09319883584976196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.758351057286662, + "compression_loss": 0.0, + "distillation_loss": 0.16633236408233643, + "epoch": 2.03, + "learning_rate": 4.0378098988211845e-05, + "loss": 0.1563, + "step": 2134, + "task_loss": 0.06592224538326263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7583709153327578, + "compression_loss": 0.0, + "distillation_loss": 0.1656203418970108, + "epoch": 2.03, + "learning_rate": 4.036969667192665e-05, + "loss": 0.1591, + "step": 2135, + "task_loss": 0.10005275160074234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7583907670656934, + "compression_loss": 0.0, + "distillation_loss": 0.1509263813495636, + "epoch": 2.03, + "learning_rate": 4.036129156366203e-05, + "loss": 0.1424, + "step": 2136, + "task_loss": 0.0660940557718277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7584106124864722, + "compression_loss": 0.0, + "distillation_loss": 0.1414584517478943, + "epoch": 2.03, + "learning_rate": 4.0352883664944816e-05, + "loss": 0.137, + "step": 2137, + "task_loss": 0.09674646705389023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7584304515960981, + "compression_loss": 0.0, + "distillation_loss": 0.10992178320884705, + "epoch": 2.03, + "learning_rate": 4.034447297730234e-05, + "loss": 0.1093, + "step": 2138, + "task_loss": 0.10394454747438431 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7584502843955747, + "compression_loss": 0.0, + "distillation_loss": 0.2345104068517685, + "epoch": 2.03, + "learning_rate": 4.033605950226246e-05, + "loss": 0.2226, + "step": 2139, + "task_loss": 0.115862175822258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7584701108859055, + "compression_loss": 0.0, + "distillation_loss": 0.15543803572654724, + "epoch": 2.03, + "learning_rate": 4.03276432413535e-05, + "loss": 0.1514, + "step": 2140, + "task_loss": 0.11481384187936783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7584899310680945, + "compression_loss": 0.0, + "distillation_loss": 0.12512551248073578, + "epoch": 2.03, + "learning_rate": 4.0319224196104334e-05, + "loss": 0.1191, + "step": 2141, + "task_loss": 0.06533493101596832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7585097449431452, + "compression_loss": 0.0, + "distillation_loss": 0.09096786379814148, + "epoch": 2.03, + "learning_rate": 4.031080236804431e-05, + "loss": 0.0902, + "step": 2142, + "task_loss": 0.08328302204608917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7585295525120613, + "compression_loss": 0.0, + "distillation_loss": 0.10509166121482849, + "epoch": 2.04, + "learning_rate": 4.03023777587033e-05, + "loss": 0.0993, + "step": 2143, + "task_loss": 0.04766244813799858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7585493537758464, + "compression_loss": 0.0, + "distillation_loss": 0.1337359994649887, + "epoch": 2.04, + "learning_rate": 4.029395036961168e-05, + "loss": 0.1341, + "step": 2144, + "task_loss": 0.13710615038871765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7585691487355044, + "compression_loss": 0.0, + "distillation_loss": 0.10559399425983429, + "epoch": 2.04, + "learning_rate": 4.028552020230031e-05, + "loss": 0.0992, + "step": 2145, + "task_loss": 0.042120400816202164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7585889373920388, + "compression_loss": 0.0, + "distillation_loss": 0.04972688853740692, + "epoch": 2.04, + "learning_rate": 4.0277087258300575e-05, + "loss": 0.0459, + "step": 2146, + "task_loss": 0.01133745163679123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7586087197464534, + "compression_loss": 0.0, + "distillation_loss": 0.16767317056655884, + "epoch": 2.04, + "learning_rate": 4.0268651539144374e-05, + "loss": 0.1856, + "step": 2147, + "task_loss": 0.3467387855052948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7586284957997518, + "compression_loss": 0.0, + "distillation_loss": 0.17021751403808594, + "epoch": 2.04, + "learning_rate": 4.026021304636408e-05, + "loss": 0.1609, + "step": 2148, + "task_loss": 0.07744796574115753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7586482655529376, + "compression_loss": 0.0, + "distillation_loss": 0.2166343480348587, + "epoch": 2.04, + "learning_rate": 4.0251771781492594e-05, + "loss": 0.2155, + "step": 2149, + "task_loss": 0.20558899641036987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7586680290070147, + "compression_loss": 0.0, + "distillation_loss": 0.23501603305339813, + "epoch": 2.04, + "learning_rate": 4.0243327746063315e-05, + "loss": 0.2259, + "step": 2150, + "task_loss": 0.14403802156448364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7586877861629866, + "compression_loss": 0.0, + "distillation_loss": 0.21590901911258698, + "epoch": 2.04, + "learning_rate": 4.0234880941610134e-05, + "loss": 0.2069, + "step": 2151, + "task_loss": 0.1256270408630371 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.758707537021857, + "compression_loss": 0.0, + "distillation_loss": 0.22788038849830627, + "epoch": 2.04, + "learning_rate": 4.022643136966746e-05, + "loss": 0.2208, + "step": 2152, + "task_loss": 0.1574449986219406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7587272815846298, + "compression_loss": 0.0, + "distillation_loss": 0.34770405292510986, + "epoch": 2.04, + "learning_rate": 4.021797903177019e-05, + "loss": 0.336, + "step": 2153, + "task_loss": 0.23103071749210358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7587470198523083, + "compression_loss": 0.0, + "distillation_loss": 0.13833436369895935, + "epoch": 2.05, + "learning_rate": 4.0209523929453744e-05, + "loss": 0.1318, + "step": 2154, + "task_loss": 0.0730195865035057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7587667518258966, + "compression_loss": 0.0, + "distillation_loss": 0.1359223574399948, + "epoch": 2.05, + "learning_rate": 4.0201066064254026e-05, + "loss": 0.1326, + "step": 2155, + "task_loss": 0.10307220369577408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.758786477506398, + "compression_loss": 0.0, + "distillation_loss": 0.19817477464675903, + "epoch": 2.05, + "learning_rate": 4.019260543770745e-05, + "loss": 0.1921, + "step": 2156, + "task_loss": 0.13708311319351196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7588061968948164, + "compression_loss": 0.0, + "distillation_loss": 0.1101415753364563, + "epoch": 2.05, + "learning_rate": 4.018414205135093e-05, + "loss": 0.1024, + "step": 2157, + "task_loss": 0.03281085193157196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7588259099921556, + "compression_loss": 0.0, + "distillation_loss": 0.2274709939956665, + "epoch": 2.05, + "learning_rate": 4.017567590672187e-05, + "loss": 0.2163, + "step": 2158, + "task_loss": 0.11540090292692184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.758845616799419, + "compression_loss": 0.0, + "distillation_loss": 0.26632124185562134, + "epoch": 2.05, + "learning_rate": 4.01672070053582e-05, + "loss": 0.2622, + "step": 2159, + "task_loss": 0.2254357933998108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7588653173176103, + "compression_loss": 0.0, + "distillation_loss": 0.32481926679611206, + "epoch": 2.05, + "learning_rate": 4.015873534879833e-05, + "loss": 0.3212, + "step": 2160, + "task_loss": 0.288277268409729 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7588850115477335, + "compression_loss": 0.0, + "distillation_loss": 0.2757709324359894, + "epoch": 2.05, + "learning_rate": 4.015026093858119e-05, + "loss": 0.2797, + "step": 2161, + "task_loss": 0.3151787519454956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7589046994907919, + "compression_loss": 0.0, + "distillation_loss": 0.07919088006019592, + "epoch": 2.05, + "learning_rate": 4.014178377624617e-05, + "loss": 0.077, + "step": 2162, + "task_loss": 0.05737023800611496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7589243811477894, + "compression_loss": 0.0, + "distillation_loss": 0.13900256156921387, + "epoch": 2.05, + "learning_rate": 4.013330386333321e-05, + "loss": 0.138, + "step": 2163, + "task_loss": 0.12849166989326477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7589440565197296, + "compression_loss": 0.0, + "distillation_loss": 0.14013248682022095, + "epoch": 2.06, + "learning_rate": 4.012482120138272e-05, + "loss": 0.1338, + "step": 2164, + "task_loss": 0.07710616290569305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7589637256076164, + "compression_loss": 0.0, + "distillation_loss": 0.05423755198717117, + "epoch": 2.06, + "learning_rate": 4.011633579193561e-05, + "loss": 0.0512, + "step": 2165, + "task_loss": 0.024237103760242462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7589833884124532, + "compression_loss": 0.0, + "distillation_loss": 0.20200000703334808, + "epoch": 2.06, + "learning_rate": 4.010784763653331e-05, + "loss": 0.2011, + "step": 2166, + "task_loss": 0.19308245182037354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7590030449352436, + "compression_loss": 0.0, + "distillation_loss": 0.20488443970680237, + "epoch": 2.06, + "learning_rate": 4.0099356736717725e-05, + "loss": 0.2045, + "step": 2167, + "task_loss": 0.20138651132583618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7590226951769916, + "compression_loss": 0.0, + "distillation_loss": 0.10349798947572708, + "epoch": 2.06, + "learning_rate": 4.0090863094031274e-05, + "loss": 0.1, + "step": 2168, + "task_loss": 0.06868550926446915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7590423391387008, + "compression_loss": 0.0, + "distillation_loss": 0.09170787781476974, + "epoch": 2.06, + "learning_rate": 4.008236671001686e-05, + "loss": 0.0931, + "step": 2169, + "task_loss": 0.10580511391162872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7590619768213748, + "compression_loss": 0.0, + "distillation_loss": 0.15538711845874786, + "epoch": 2.06, + "learning_rate": 4.0073867586217895e-05, + "loss": 0.1487, + "step": 2170, + "task_loss": 0.08846833556890488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7590816082260172, + "compression_loss": 0.0, + "distillation_loss": 0.1697424054145813, + "epoch": 2.06, + "learning_rate": 4.006536572417828e-05, + "loss": 0.1626, + "step": 2171, + "task_loss": 0.09881134331226349 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7591012333536319, + "compression_loss": 0.0, + "distillation_loss": 0.13482381403446198, + "epoch": 2.06, + "learning_rate": 4.0056861125442435e-05, + "loss": 0.1331, + "step": 2172, + "task_loss": 0.11754244565963745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7591208522052225, + "compression_loss": 0.0, + "distillation_loss": 0.29298990964889526, + "epoch": 2.06, + "learning_rate": 4.004835379155525e-05, + "loss": 0.2928, + "step": 2173, + "task_loss": 0.291414350271225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7591404647817925, + "compression_loss": 0.0, + "distillation_loss": 0.09788675606250763, + "epoch": 2.06, + "learning_rate": 4.003984372406212e-05, + "loss": 0.0913, + "step": 2174, + "task_loss": 0.03239801526069641 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7591600710843459, + "compression_loss": 0.0, + "distillation_loss": 0.05986270681023598, + "epoch": 2.07, + "learning_rate": 4.003133092450895e-05, + "loss": 0.0661, + "step": 2175, + "task_loss": 0.1226883977651596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7591796711138862, + "compression_loss": 0.0, + "distillation_loss": 0.038474880158901215, + "epoch": 2.07, + "learning_rate": 4.002281539444213e-05, + "loss": 0.0356, + "step": 2176, + "task_loss": 0.00936584360897541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.759199264871417, + "compression_loss": 0.0, + "distillation_loss": 0.11121018975973129, + "epoch": 2.07, + "learning_rate": 4.001429713540853e-05, + "loss": 0.1127, + "step": 2177, + "task_loss": 0.12647242844104767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7592188523579422, + "compression_loss": 0.0, + "distillation_loss": 0.11997029185295105, + "epoch": 2.07, + "learning_rate": 4.000577614895555e-05, + "loss": 0.1197, + "step": 2178, + "task_loss": 0.11679819226264954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7592384335744653, + "compression_loss": 0.0, + "distillation_loss": 0.2858255207538605, + "epoch": 2.07, + "learning_rate": 3.999725243663107e-05, + "loss": 0.2816, + "step": 2179, + "task_loss": 0.24326691031455994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.75925800852199, + "compression_loss": 0.0, + "distillation_loss": 0.09184737503528595, + "epoch": 2.07, + "learning_rate": 3.9988725999983456e-05, + "loss": 0.0963, + "step": 2180, + "task_loss": 0.13659027218818665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7592775772015201, + "compression_loss": 0.0, + "distillation_loss": 0.11890025436878204, + "epoch": 2.07, + "learning_rate": 3.998019684056158e-05, + "loss": 0.121, + "step": 2181, + "task_loss": 0.13989683985710144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7592971396140592, + "compression_loss": 0.0, + "distillation_loss": 0.189500093460083, + "epoch": 2.07, + "learning_rate": 3.99716649599148e-05, + "loss": 0.1928, + "step": 2182, + "task_loss": 0.22282639145851135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.759316695760611, + "compression_loss": 0.0, + "distillation_loss": 0.23618614673614502, + "epoch": 2.07, + "learning_rate": 3.996313035959297e-05, + "loss": 0.2329, + "step": 2183, + "task_loss": 0.20297005772590637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7593362456421792, + "compression_loss": 0.0, + "distillation_loss": 0.09011323750019073, + "epoch": 2.07, + "learning_rate": 3.995459304114645e-05, + "loss": 0.0857, + "step": 2184, + "task_loss": 0.04603835195302963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7593557892597673, + "compression_loss": 0.0, + "distillation_loss": 0.14607930183410645, + "epoch": 2.08, + "learning_rate": 3.9946053006126086e-05, + "loss": 0.1419, + "step": 2185, + "task_loss": 0.1042981743812561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7593753266143792, + "compression_loss": 0.0, + "distillation_loss": 0.12250328063964844, + "epoch": 2.08, + "learning_rate": 3.993751025608321e-05, + "loss": 0.1182, + "step": 2186, + "task_loss": 0.07953470945358276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7593948577070186, + "compression_loss": 0.0, + "distillation_loss": 0.1935318261384964, + "epoch": 2.08, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.1838, + "step": 2187, + "task_loss": 0.09669722616672516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7594143825386891, + "compression_loss": 0.0, + "distillation_loss": 0.07566056400537491, + "epoch": 2.08, + "learning_rate": 3.9920416617137745e-05, + "loss": 0.0751, + "step": 2188, + "task_loss": 0.07045575231313705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7594339011103943, + "compression_loss": 0.0, + "distillation_loss": 0.05975474417209625, + "epoch": 2.08, + "learning_rate": 3.9911865731340306e-05, + "loss": 0.0558, + "step": 2189, + "task_loss": 0.020496994256973267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7594534134231381, + "compression_loss": 0.0, + "distillation_loss": 0.1242765486240387, + "epoch": 2.08, + "learning_rate": 3.9903312136730634e-05, + "loss": 0.1215, + "step": 2190, + "task_loss": 0.09669384360313416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7594729194779241, + "compression_loss": 0.0, + "distillation_loss": 0.15067118406295776, + "epoch": 2.08, + "learning_rate": 3.989475583486254e-05, + "loss": 0.1422, + "step": 2191, + "task_loss": 0.06594227254390717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7594924192757557, + "compression_loss": 0.0, + "distillation_loss": 0.15141010284423828, + "epoch": 2.08, + "learning_rate": 3.988619682729032e-05, + "loss": 0.1463, + "step": 2192, + "task_loss": 0.10027869790792465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.759511912817637, + "compression_loss": 0.0, + "distillation_loss": 0.08504438400268555, + "epoch": 2.08, + "learning_rate": 3.987763511556874e-05, + "loss": 0.0815, + "step": 2193, + "task_loss": 0.0492318794131279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7595314001045714, + "compression_loss": 0.0, + "distillation_loss": 0.03857031837105751, + "epoch": 2.08, + "learning_rate": 3.98690707012531e-05, + "loss": 0.0354, + "step": 2194, + "task_loss": 0.006879139691591263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7595508811375626, + "compression_loss": 0.0, + "distillation_loss": 0.12435384094715118, + "epoch": 2.08, + "learning_rate": 3.986050358589916e-05, + "loss": 0.1221, + "step": 2195, + "task_loss": 0.10201095044612885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7595703559176146, + "compression_loss": 0.0, + "distillation_loss": 0.13030223548412323, + "epoch": 2.09, + "learning_rate": 3.9851933771063166e-05, + "loss": 0.1234, + "step": 2196, + "task_loss": 0.06090515851974487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7595898244457306, + "compression_loss": 0.0, + "distillation_loss": 0.21584641933441162, + "epoch": 2.09, + "learning_rate": 3.9843361258301876e-05, + "loss": 0.204, + "step": 2197, + "task_loss": 0.09771460294723511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7596092867229147, + "compression_loss": 0.0, + "distillation_loss": 0.09045465290546417, + "epoch": 2.09, + "learning_rate": 3.983478604917253e-05, + "loss": 0.0834, + "step": 2198, + "task_loss": 0.01959792897105217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7596287427501703, + "compression_loss": 0.0, + "distillation_loss": 0.17018280923366547, + "epoch": 2.09, + "learning_rate": 3.9826208145232855e-05, + "loss": 0.1639, + "step": 2199, + "task_loss": 0.10730506479740143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7596481925285012, + "compression_loss": 0.0, + "distillation_loss": 0.0529235415160656, + "epoch": 2.09, + "learning_rate": 3.981762754804107e-05, + "loss": 0.0491, + "step": 2200, + "task_loss": 0.015178944915533066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7596676360589112, + "compression_loss": 0.0, + "distillation_loss": 0.16582679748535156, + "epoch": 2.09, + "learning_rate": 3.980904425915586e-05, + "loss": 0.157, + "step": 2201, + "task_loss": 0.07799357175827026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7596870733424037, + "compression_loss": 0.0, + "distillation_loss": 0.12228982895612717, + "epoch": 2.09, + "learning_rate": 3.9800458280136453e-05, + "loss": 0.1221, + "step": 2202, + "task_loss": 0.12067455798387527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7597065043799826, + "compression_loss": 0.0, + "distillation_loss": 0.28389573097229004, + "epoch": 2.09, + "learning_rate": 3.979186961254252e-05, + "loss": 0.2762, + "step": 2203, + "task_loss": 0.2065102607011795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7597259291726515, + "compression_loss": 0.0, + "distillation_loss": 0.04436230659484863, + "epoch": 2.09, + "learning_rate": 3.9783278257934233e-05, + "loss": 0.0407, + "step": 2204, + "task_loss": 0.007897831499576569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7597453477214141, + "compression_loss": 0.0, + "distillation_loss": 0.17565417289733887, + "epoch": 2.09, + "learning_rate": 3.977468421787225e-05, + "loss": 0.169, + "step": 2205, + "task_loss": 0.10887274146080017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7597647600272741, + "compression_loss": 0.0, + "distillation_loss": 0.15632234513759613, + "epoch": 2.09, + "learning_rate": 3.976608749391773e-05, + "loss": 0.1454, + "step": 2206, + "task_loss": 0.04727863147854805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7597841660912351, + "compression_loss": 0.0, + "distillation_loss": 0.2122887820005417, + "epoch": 2.1, + "learning_rate": 3.975748808763229e-05, + "loss": 0.2159, + "step": 2207, + "task_loss": 0.24815846979618073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7598035659143009, + "compression_loss": 0.0, + "distillation_loss": 0.2334968000650406, + "epoch": 2.1, + "learning_rate": 3.974888600057808e-05, + "loss": 0.2257, + "step": 2208, + "task_loss": 0.15524537861347198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7598229594974751, + "compression_loss": 0.0, + "distillation_loss": 0.2860737442970276, + "epoch": 2.1, + "learning_rate": 3.974028123431769e-05, + "loss": 0.2848, + "step": 2209, + "task_loss": 0.27306628227233887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7598423468417614, + "compression_loss": 0.0, + "distillation_loss": 0.17595066130161285, + "epoch": 2.1, + "learning_rate": 3.973167379041421e-05, + "loss": 0.186, + "step": 2210, + "task_loss": 0.2765977382659912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7598617279481636, + "compression_loss": 0.0, + "distillation_loss": 0.049867670983076096, + "epoch": 2.1, + "learning_rate": 3.972306367043126e-05, + "loss": 0.0474, + "step": 2211, + "task_loss": 0.02561107836663723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7598811028176852, + "compression_loss": 0.0, + "distillation_loss": 0.13108716905117035, + "epoch": 2.1, + "learning_rate": 3.971445087593288e-05, + "loss": 0.1296, + "step": 2212, + "task_loss": 0.11605143547058105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7599004714513299, + "compression_loss": 0.0, + "distillation_loss": 0.15945231914520264, + "epoch": 2.1, + "learning_rate": 3.970583540848363e-05, + "loss": 0.1644, + "step": 2213, + "task_loss": 0.20883622765541077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7599198338501015, + "compression_loss": 0.0, + "distillation_loss": 0.3196389973163605, + "epoch": 2.1, + "learning_rate": 3.969721726964856e-05, + "loss": 0.3091, + "step": 2214, + "task_loss": 0.21398873627185822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7599391900150037, + "compression_loss": 0.0, + "distillation_loss": 0.30575287342071533, + "epoch": 2.1, + "learning_rate": 3.9688596460993176e-05, + "loss": 0.29, + "step": 2215, + "task_loss": 0.1479155719280243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7599585399470399, + "compression_loss": 0.0, + "distillation_loss": 0.08897826820611954, + "epoch": 2.1, + "learning_rate": 3.967997298408352e-05, + "loss": 0.0882, + "step": 2216, + "task_loss": 0.08128625154495239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7599778836472142, + "compression_loss": 0.0, + "distillation_loss": 0.0864962711930275, + "epoch": 2.11, + "learning_rate": 3.967134684048607e-05, + "loss": 0.0896, + "step": 2217, + "task_loss": 0.11729071289300919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7599972211165299, + "compression_loss": 0.0, + "distillation_loss": 0.23064328730106354, + "epoch": 2.11, + "learning_rate": 3.96627180317678e-05, + "loss": 0.2177, + "step": 2218, + "task_loss": 0.10095411539077759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.760016552355991, + "compression_loss": 0.0, + "distillation_loss": 0.13302667438983917, + "epoch": 2.11, + "learning_rate": 3.965408655949619e-05, + "loss": 0.141, + "step": 2219, + "task_loss": 0.21288727223873138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7600358773666008, + "compression_loss": 0.0, + "distillation_loss": 0.27681732177734375, + "epoch": 2.11, + "learning_rate": 3.964545242523917e-05, + "loss": 0.2902, + "step": 2220, + "task_loss": 0.41104960441589355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7600551961493635, + "compression_loss": 0.0, + "distillation_loss": 0.0774826779961586, + "epoch": 2.11, + "learning_rate": 3.9636815630565194e-05, + "loss": 0.0723, + "step": 2221, + "task_loss": 0.026051845401525497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7600745087052823, + "compression_loss": 0.0, + "distillation_loss": 0.0630701333284378, + "epoch": 2.11, + "learning_rate": 3.962817617704317e-05, + "loss": 0.0647, + "step": 2222, + "task_loss": 0.07926364988088608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7600938150353611, + "compression_loss": 0.0, + "distillation_loss": 0.22707828879356384, + "epoch": 2.11, + "learning_rate": 3.9619534066242485e-05, + "loss": 0.2251, + "step": 2223, + "task_loss": 0.20687554776668549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7601131151406035, + "compression_loss": 0.0, + "distillation_loss": 0.20871593058109283, + "epoch": 2.11, + "learning_rate": 3.961088929973303e-05, + "loss": 0.2074, + "step": 2224, + "task_loss": 0.1955493688583374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7601324090220134, + "compression_loss": 0.0, + "distillation_loss": 0.08324743062257767, + "epoch": 2.11, + "learning_rate": 3.960224187908518e-05, + "loss": 0.079, + "step": 2225, + "task_loss": 0.04101306200027466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7601516966805942, + "compression_loss": 0.0, + "distillation_loss": 0.22678276896476746, + "epoch": 2.11, + "learning_rate": 3.959359180586975e-05, + "loss": 0.236, + "step": 2226, + "task_loss": 0.31925228238105774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7601709781173498, + "compression_loss": 0.0, + "distillation_loss": 0.15277154743671417, + "epoch": 2.11, + "learning_rate": 3.958493908165809e-05, + "loss": 0.1434, + "step": 2227, + "task_loss": 0.05934750288724899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7601902533332837, + "compression_loss": 0.0, + "distillation_loss": 0.13886742293834686, + "epoch": 2.12, + "learning_rate": 3.9576283708022e-05, + "loss": 0.1342, + "step": 2228, + "task_loss": 0.09246313571929932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7602095223293998, + "compression_loss": 0.0, + "distillation_loss": 0.12244449555873871, + "epoch": 2.12, + "learning_rate": 3.956762568653378e-05, + "loss": 0.1165, + "step": 2229, + "task_loss": 0.06263736635446548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7602287851067016, + "compression_loss": 0.0, + "distillation_loss": 0.11867455393075943, + "epoch": 2.12, + "learning_rate": 3.95589650187662e-05, + "loss": 0.1288, + "step": 2230, + "task_loss": 0.21948017179965973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7602480416661928, + "compression_loss": 0.0, + "distillation_loss": 0.17664200067520142, + "epoch": 2.12, + "learning_rate": 3.95503017062925e-05, + "loss": 0.1744, + "step": 2231, + "task_loss": 0.15381231904029846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.760267292008877, + "compression_loss": 0.0, + "distillation_loss": 0.10297991335391998, + "epoch": 2.12, + "learning_rate": 3.954163575068643e-05, + "loss": 0.0943, + "step": 2232, + "task_loss": 0.01588231697678566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7602865361357581, + "compression_loss": 0.0, + "distillation_loss": 0.0686907023191452, + "epoch": 2.12, + "learning_rate": 3.953296715352218e-05, + "loss": 0.0713, + "step": 2233, + "task_loss": 0.09433852881193161 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7603057740478397, + "compression_loss": 0.0, + "distillation_loss": 0.11666402965784073, + "epoch": 2.12, + "learning_rate": 3.952429591637446e-05, + "loss": 0.1239, + "step": 2234, + "task_loss": 0.18875601887702942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7603250057461255, + "compression_loss": 0.0, + "distillation_loss": 0.02542857825756073, + "epoch": 2.12, + "learning_rate": 3.951562204081845e-05, + "loss": 0.0243, + "step": 2235, + "task_loss": 0.014179892838001251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7603442312316191, + "compression_loss": 0.0, + "distillation_loss": 0.024139802902936935, + "epoch": 2.12, + "learning_rate": 3.950694552842977e-05, + "loss": 0.0222, + "step": 2236, + "task_loss": 0.0047469362616539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7603634505053243, + "compression_loss": 0.0, + "distillation_loss": 0.2176014482975006, + "epoch": 2.12, + "learning_rate": 3.949826638078457e-05, + "loss": 0.2075, + "step": 2237, + "task_loss": 0.11611790210008621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7603826635682446, + "compression_loss": 0.0, + "distillation_loss": 0.1716989278793335, + "epoch": 2.13, + "learning_rate": 3.948958459945946e-05, + "loss": 0.1658, + "step": 2238, + "task_loss": 0.11308705061674118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7604018704213837, + "compression_loss": 0.0, + "distillation_loss": 0.3528546392917633, + "epoch": 2.13, + "learning_rate": 3.948090018603153e-05, + "loss": 0.3415, + "step": 2239, + "task_loss": 0.2390662580728531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7604210710657455, + "compression_loss": 0.0, + "distillation_loss": 0.20263050496578217, + "epoch": 2.13, + "learning_rate": 3.947221314207834e-05, + "loss": 0.2039, + "step": 2240, + "task_loss": 0.21529477834701538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7604402655023336, + "compression_loss": 0.0, + "distillation_loss": 0.11417360603809357, + "epoch": 2.13, + "learning_rate": 3.9463523469177935e-05, + "loss": 0.1254, + "step": 2241, + "task_loss": 0.22623518109321594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7604594537321516, + "compression_loss": 0.0, + "distillation_loss": 0.05411672592163086, + "epoch": 2.13, + "learning_rate": 3.9454831168908824e-05, + "loss": 0.0636, + "step": 2242, + "task_loss": 0.14903995394706726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7604786357562032, + "compression_loss": 0.0, + "distillation_loss": 0.09386488795280457, + "epoch": 2.13, + "learning_rate": 3.9446136242850025e-05, + "loss": 0.1017, + "step": 2243, + "task_loss": 0.1717870980501175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7604978115754921, + "compression_loss": 0.0, + "distillation_loss": 0.056300774216651917, + "epoch": 2.13, + "learning_rate": 3.9437438692581e-05, + "loss": 0.0646, + "step": 2244, + "task_loss": 0.1389637589454651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.760516981191022, + "compression_loss": 0.0, + "distillation_loss": 0.0845174565911293, + "epoch": 2.13, + "learning_rate": 3.9428738519681704e-05, + "loss": 0.0873, + "step": 2245, + "task_loss": 0.11270405352115631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7605361446037966, + "compression_loss": 0.0, + "distillation_loss": 0.08052958548069, + "epoch": 2.13, + "learning_rate": 3.942003572573257e-05, + "loss": 0.0794, + "step": 2246, + "task_loss": 0.06879500299692154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7605553018148195, + "compression_loss": 0.0, + "distillation_loss": 0.08333942294120789, + "epoch": 2.13, + "learning_rate": 3.94113303123145e-05, + "loss": 0.0817, + "step": 2247, + "task_loss": 0.06727690994739532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7605744528250944, + "compression_loss": 0.0, + "distillation_loss": 0.11436310410499573, + "epoch": 2.13, + "learning_rate": 3.9402622281008874e-05, + "loss": 0.1101, + "step": 2248, + "task_loss": 0.07128392904996872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7605935976356251, + "compression_loss": 0.0, + "distillation_loss": 0.2693362832069397, + "epoch": 2.14, + "learning_rate": 3.939391163339754e-05, + "loss": 0.2577, + "step": 2249, + "task_loss": 0.1526666134595871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7606127362474151, + "compression_loss": 0.0, + "distillation_loss": 0.07121552526950836, + "epoch": 2.14, + "learning_rate": 3.9385198371062845e-05, + "loss": 0.0725, + "step": 2250, + "task_loss": 0.08450750261545181 + }, + { + "epoch": 2.14, + "eval_accuracy": 0.8818807339449541, + "eval_loss": 0.48909926414489746, + "eval_runtime": 18.0472, + "eval_samples_per_second": 48.318, + "eval_steps_per_second": 6.04, + "step": 2250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7606318686614681, + "compression_loss": 0.0, + "distillation_loss": 0.31588560342788696, + "epoch": 2.14, + "learning_rate": 3.937648249558758e-05, + "loss": 0.3102, + "step": 2251, + "task_loss": 0.25923851132392883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7606509948787881, + "compression_loss": 0.0, + "distillation_loss": 0.0453728511929512, + "epoch": 2.14, + "learning_rate": 3.9367764008555034e-05, + "loss": 0.0592, + "step": 2252, + "task_loss": 0.18407374620437622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7606701149003784, + "compression_loss": 0.0, + "distillation_loss": 0.16821657121181488, + "epoch": 2.14, + "learning_rate": 3.9359042911548955e-05, + "loss": 0.1716, + "step": 2253, + "task_loss": 0.20164065062999725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7606892287272428, + "compression_loss": 0.0, + "distillation_loss": 0.1436726152896881, + "epoch": 2.14, + "learning_rate": 3.935031920615358e-05, + "loss": 0.1497, + "step": 2254, + "task_loss": 0.20403361320495605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.760708336360385, + "compression_loss": 0.0, + "distillation_loss": 0.40433016419410706, + "epoch": 2.14, + "learning_rate": 3.934159289395361e-05, + "loss": 0.3865, + "step": 2255, + "task_loss": 0.22571320831775665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7607274378008088, + "compression_loss": 0.0, + "distillation_loss": 0.1408395618200302, + "epoch": 2.14, + "learning_rate": 3.9332863976534225e-05, + "loss": 0.1363, + "step": 2256, + "task_loss": 0.09514202177524567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7607465330495176, + "compression_loss": 0.0, + "distillation_loss": 0.18185870349407196, + "epoch": 2.14, + "learning_rate": 3.9324132455481064e-05, + "loss": 0.173, + "step": 2257, + "task_loss": 0.09325390309095383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7607656221075153, + "compression_loss": 0.0, + "distillation_loss": 0.1712486296892166, + "epoch": 2.14, + "learning_rate": 3.931539833238026e-05, + "loss": 0.1596, + "step": 2258, + "task_loss": 0.05442361161112785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7607847049758056, + "compression_loss": 0.0, + "distillation_loss": 0.15352007746696472, + "epoch": 2.15, + "learning_rate": 3.930666160881841e-05, + "loss": 0.1534, + "step": 2259, + "task_loss": 0.15225310623645782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7608037816553921, + "compression_loss": 0.0, + "distillation_loss": 0.15940865874290466, + "epoch": 2.15, + "learning_rate": 3.9297922286382573e-05, + "loss": 0.1447, + "step": 2260, + "task_loss": 0.012211665511131287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7608228521472785, + "compression_loss": 0.0, + "distillation_loss": 0.03598247095942497, + "epoch": 2.15, + "learning_rate": 3.928918036666029e-05, + "loss": 0.0342, + "step": 2261, + "task_loss": 0.01777983456850052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7608419164524685, + "compression_loss": 0.0, + "distillation_loss": 0.17762696743011475, + "epoch": 2.15, + "learning_rate": 3.928043585123957e-05, + "loss": 0.1757, + "step": 2262, + "task_loss": 0.15850163996219635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7608609745719657, + "compression_loss": 0.0, + "distillation_loss": 0.0662430077791214, + "epoch": 2.15, + "learning_rate": 3.927168874170891e-05, + "loss": 0.0623, + "step": 2263, + "task_loss": 0.026589026674628258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7608800265067739, + "compression_loss": 0.0, + "distillation_loss": 0.07591673731803894, + "epoch": 2.15, + "learning_rate": 3.926293903965726e-05, + "loss": 0.0699, + "step": 2264, + "task_loss": 0.01550477184355259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7608990722578967, + "compression_loss": 0.0, + "distillation_loss": 0.11433359235525131, + "epoch": 2.15, + "learning_rate": 3.925418674667405e-05, + "loss": 0.1113, + "step": 2265, + "task_loss": 0.08434649556875229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7609181118263377, + "compression_loss": 0.0, + "distillation_loss": 0.1352238655090332, + "epoch": 2.15, + "learning_rate": 3.924543186434915e-05, + "loss": 0.1273, + "step": 2266, + "task_loss": 0.05604308471083641 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7609371452131007, + "compression_loss": 0.0, + "distillation_loss": 0.17770247161388397, + "epoch": 2.15, + "learning_rate": 3.923667439427295e-05, + "loss": 0.1764, + "step": 2267, + "task_loss": 0.1651407778263092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7609561724191896, + "compression_loss": 0.0, + "distillation_loss": 0.06021641939878464, + "epoch": 2.15, + "learning_rate": 3.922791433803629e-05, + "loss": 0.0652, + "step": 2268, + "task_loss": 0.1097673624753952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7609751934456076, + "compression_loss": 0.0, + "distillation_loss": 0.057901740074157715, + "epoch": 2.15, + "learning_rate": 3.921915169723046e-05, + "loss": 0.0638, + "step": 2269, + "task_loss": 0.11656754463911057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7609942082933588, + "compression_loss": 0.0, + "distillation_loss": 0.12669652700424194, + "epoch": 2.16, + "learning_rate": 3.921038647344725e-05, + "loss": 0.1198, + "step": 2270, + "task_loss": 0.057524219155311584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7610132169634467, + "compression_loss": 0.0, + "distillation_loss": 0.11540260910987854, + "epoch": 2.16, + "learning_rate": 3.920161866827889e-05, + "loss": 0.112, + "step": 2271, + "task_loss": 0.08180786669254303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7610322194568749, + "compression_loss": 0.0, + "distillation_loss": 0.08194451034069061, + "epoch": 2.16, + "learning_rate": 3.9192848283318114e-05, + "loss": 0.0761, + "step": 2272, + "task_loss": 0.023745771497488022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7610512157746473, + "compression_loss": 0.0, + "distillation_loss": 0.09653206914663315, + "epoch": 2.16, + "learning_rate": 3.91840753201581e-05, + "loss": 0.0989, + "step": 2273, + "task_loss": 0.11974017322063446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7610702059177674, + "compression_loss": 0.0, + "distillation_loss": 0.17683292925357819, + "epoch": 2.16, + "learning_rate": 3.917529978039247e-05, + "loss": 0.1677, + "step": 2274, + "task_loss": 0.08567538112401962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761089189887239, + "compression_loss": 0.0, + "distillation_loss": 0.15673840045928955, + "epoch": 2.16, + "learning_rate": 3.9166521665615386e-05, + "loss": 0.1618, + "step": 2275, + "task_loss": 0.20756980776786804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7611081676840656, + "compression_loss": 0.0, + "distillation_loss": 0.16288813948631287, + "epoch": 2.16, + "learning_rate": 3.91577409774214e-05, + "loss": 0.164, + "step": 2276, + "task_loss": 0.17406190931797028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7611271393092511, + "compression_loss": 0.0, + "distillation_loss": 0.09783841669559479, + "epoch": 2.16, + "learning_rate": 3.9148957717405596e-05, + "loss": 0.0942, + "step": 2277, + "task_loss": 0.061522744596004486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7611461047637991, + "compression_loss": 0.0, + "distillation_loss": 0.1405646800994873, + "epoch": 2.16, + "learning_rate": 3.914017188716347e-05, + "loss": 0.1331, + "step": 2278, + "task_loss": 0.06549764424562454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7611650640487132, + "compression_loss": 0.0, + "distillation_loss": 0.09124282747507095, + "epoch": 2.16, + "learning_rate": 3.913138348829102e-05, + "loss": 0.0917, + "step": 2279, + "task_loss": 0.09613334387540817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7611840171649973, + "compression_loss": 0.0, + "distillation_loss": 0.1638939529657364, + "epoch": 2.17, + "learning_rate": 3.91225925223847e-05, + "loss": 0.161, + "step": 2280, + "task_loss": 0.13536801934242249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7612029641136548, + "compression_loss": 0.0, + "distillation_loss": 0.0714389756321907, + "epoch": 2.17, + "learning_rate": 3.911379899104144e-05, + "loss": 0.0746, + "step": 2281, + "task_loss": 0.10342606157064438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7612219048956895, + "compression_loss": 0.0, + "distillation_loss": 0.1365533024072647, + "epoch": 2.17, + "learning_rate": 3.910500289585862e-05, + "loss": 0.142, + "step": 2282, + "task_loss": 0.19071495532989502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7612408395121052, + "compression_loss": 0.0, + "distillation_loss": 0.04966387897729874, + "epoch": 2.17, + "learning_rate": 3.90962042384341e-05, + "loss": 0.0503, + "step": 2283, + "task_loss": 0.05590268224477768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7612597679639055, + "compression_loss": 0.0, + "distillation_loss": 0.03692065179347992, + "epoch": 2.17, + "learning_rate": 3.908740302036618e-05, + "loss": 0.0443, + "step": 2284, + "task_loss": 0.110334612429142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761278690252094, + "compression_loss": 0.0, + "distillation_loss": 0.10264438390731812, + "epoch": 2.17, + "learning_rate": 3.907859924325366e-05, + "loss": 0.1084, + "step": 2285, + "task_loss": 0.16008785367012024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7612976063776745, + "compression_loss": 0.0, + "distillation_loss": 0.11101731657981873, + "epoch": 2.17, + "learning_rate": 3.906979290869578e-05, + "loss": 0.105, + "step": 2286, + "task_loss": 0.05065052583813667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7613165163416505, + "compression_loss": 0.0, + "distillation_loss": 0.14465484023094177, + "epoch": 2.17, + "learning_rate": 3.9060984018292267e-05, + "loss": 0.1474, + "step": 2287, + "task_loss": 0.17257431149482727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761335420145026, + "compression_loss": 0.0, + "distillation_loss": 0.30154234170913696, + "epoch": 2.17, + "learning_rate": 3.905217257364328e-05, + "loss": 0.2859, + "step": 2288, + "task_loss": 0.14557313919067383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7613543177888044, + "compression_loss": 0.0, + "distillation_loss": 0.09943203628063202, + "epoch": 2.17, + "learning_rate": 3.904335857634948e-05, + "loss": 0.1025, + "step": 2289, + "task_loss": 0.12985821068286896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7613732092739895, + "compression_loss": 0.0, + "distillation_loss": 0.08901150524616241, + "epoch": 2.17, + "learning_rate": 3.9034542028011944e-05, + "loss": 0.0838, + "step": 2290, + "task_loss": 0.03706664592027664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761392094601585, + "compression_loss": 0.0, + "distillation_loss": 0.01898990385234356, + "epoch": 2.18, + "learning_rate": 3.902572293023227e-05, + "loss": 0.0269, + "step": 2291, + "task_loss": 0.09843745082616806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7614109737725945, + "compression_loss": 0.0, + "distillation_loss": 0.04945681244134903, + "epoch": 2.18, + "learning_rate": 3.9016901284612474e-05, + "loss": 0.046, + "step": 2292, + "task_loss": 0.015036612749099731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7614298467880216, + "compression_loss": 0.0, + "distillation_loss": 0.07033106684684753, + "epoch": 2.18, + "learning_rate": 3.9008077092755055e-05, + "loss": 0.0685, + "step": 2293, + "task_loss": 0.05228840187191963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7614487136488702, + "compression_loss": 0.0, + "distillation_loss": 0.02366098389029503, + "epoch": 2.18, + "learning_rate": 3.899925035626296e-05, + "loss": 0.0333, + "step": 2294, + "task_loss": 0.1205047145485878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761467574356144, + "compression_loss": 0.0, + "distillation_loss": 0.11169403791427612, + "epoch": 2.18, + "learning_rate": 3.899042107673962e-05, + "loss": 0.1119, + "step": 2295, + "task_loss": 0.11369898915290833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7614864289108464, + "compression_loss": 0.0, + "distillation_loss": 0.039377085864543915, + "epoch": 2.18, + "learning_rate": 3.898158925578893e-05, + "loss": 0.0445, + "step": 2296, + "task_loss": 0.09109679609537125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7615052773139814, + "compression_loss": 0.0, + "distillation_loss": 0.32753410935401917, + "epoch": 2.18, + "learning_rate": 3.89727548950152e-05, + "loss": 0.3187, + "step": 2297, + "task_loss": 0.23894454538822174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7615241195665524, + "compression_loss": 0.0, + "distillation_loss": 0.13404498994350433, + "epoch": 2.18, + "learning_rate": 3.8963917996023245e-05, + "loss": 0.1353, + "step": 2298, + "task_loss": 0.14633318781852722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7615429556695633, + "compression_loss": 0.0, + "distillation_loss": 0.11489589512348175, + "epoch": 2.18, + "learning_rate": 3.8955078560418345e-05, + "loss": 0.1183, + "step": 2299, + "task_loss": 0.14893510937690735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7615617856240177, + "compression_loss": 0.0, + "distillation_loss": 0.02402464672923088, + "epoch": 2.18, + "learning_rate": 3.894623658980622e-05, + "loss": 0.0309, + "step": 2300, + "task_loss": 0.09274609386920929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7615806094309192, + "compression_loss": 0.0, + "distillation_loss": 0.12553593516349792, + "epoch": 2.19, + "learning_rate": 3.8937392085793036e-05, + "loss": 0.1179, + "step": 2301, + "task_loss": 0.04914803430438042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7615994270912717, + "compression_loss": 0.0, + "distillation_loss": 0.03810520097613335, + "epoch": 2.19, + "learning_rate": 3.892854504998546e-05, + "loss": 0.0352, + "step": 2302, + "task_loss": 0.008859049528837204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7616182386060786, + "compression_loss": 0.0, + "distillation_loss": 0.10612446069717407, + "epoch": 2.19, + "learning_rate": 3.891969548399061e-05, + "loss": 0.1083, + "step": 2303, + "task_loss": 0.12744131684303284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7616370439763438, + "compression_loss": 0.0, + "distillation_loss": 0.08019978553056717, + "epoch": 2.19, + "learning_rate": 3.891084338941603e-05, + "loss": 0.0734, + "step": 2304, + "task_loss": 0.012498440220952034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7616558432030709, + "compression_loss": 0.0, + "distillation_loss": 0.05349978804588318, + "epoch": 2.19, + "learning_rate": 3.8901988767869744e-05, + "loss": 0.0752, + "step": 2305, + "task_loss": 0.2700839042663574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7616746362872636, + "compression_loss": 0.0, + "distillation_loss": 0.052905574440956116, + "epoch": 2.19, + "learning_rate": 3.8893131620960254e-05, + "loss": 0.0546, + "step": 2306, + "task_loss": 0.06945595890283585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7616934232299255, + "compression_loss": 0.0, + "distillation_loss": 0.13802888989448547, + "epoch": 2.19, + "learning_rate": 3.88842719502965e-05, + "loss": 0.1321, + "step": 2307, + "task_loss": 0.07853611558675766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7617122040320605, + "compression_loss": 0.0, + "distillation_loss": 0.06617145985364914, + "epoch": 2.19, + "learning_rate": 3.887540975748787e-05, + "loss": 0.0631, + "step": 2308, + "task_loss": 0.03529322147369385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761730978694672, + "compression_loss": 0.0, + "distillation_loss": 0.14270713925361633, + "epoch": 2.19, + "learning_rate": 3.8866545044144234e-05, + "loss": 0.1358, + "step": 2309, + "task_loss": 0.07395662367343903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7617497472187639, + "compression_loss": 0.0, + "distillation_loss": 0.12063492834568024, + "epoch": 2.19, + "learning_rate": 3.885767781187591e-05, + "loss": 0.1182, + "step": 2310, + "task_loss": 0.09611006081104279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7617685096053398, + "compression_loss": 0.0, + "distillation_loss": 0.15884768962860107, + "epoch": 2.19, + "learning_rate": 3.884880806229367e-05, + "loss": 0.1541, + "step": 2311, + "task_loss": 0.1109381914138794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7617872658554034, + "compression_loss": 0.0, + "distillation_loss": 0.2637111246585846, + "epoch": 2.2, + "learning_rate": 3.883993579700875e-05, + "loss": 0.2475, + "step": 2312, + "task_loss": 0.10145562142133713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7618060159699582, + "compression_loss": 0.0, + "distillation_loss": 0.09027013182640076, + "epoch": 2.2, + "learning_rate": 3.883106101763285e-05, + "loss": 0.0899, + "step": 2313, + "task_loss": 0.08697813749313354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7618247599500082, + "compression_loss": 0.0, + "distillation_loss": 0.10050283372402191, + "epoch": 2.2, + "learning_rate": 3.882218372577809e-05, + "loss": 0.0967, + "step": 2314, + "task_loss": 0.062062062323093414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7618434977965569, + "compression_loss": 0.0, + "distillation_loss": 0.21101830899715424, + "epoch": 2.2, + "learning_rate": 3.881330392305709e-05, + "loss": 0.2235, + "step": 2315, + "task_loss": 0.3360675871372223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.761862229510608, + "compression_loss": 0.0, + "distillation_loss": 0.20589599013328552, + "epoch": 2.2, + "learning_rate": 3.8804421611082916e-05, + "loss": 0.1933, + "step": 2316, + "task_loss": 0.07991321384906769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7618809550931652, + "compression_loss": 0.0, + "distillation_loss": 0.1543605923652649, + "epoch": 2.2, + "learning_rate": 3.8795536791469066e-05, + "loss": 0.1454, + "step": 2317, + "task_loss": 0.06446463614702225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7618996745452322, + "compression_loss": 0.0, + "distillation_loss": 0.09525361657142639, + "epoch": 2.2, + "learning_rate": 3.8786649465829516e-05, + "loss": 0.0921, + "step": 2318, + "task_loss": 0.0634106770157814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7619183878678126, + "compression_loss": 0.0, + "distillation_loss": 0.041672758758068085, + "epoch": 2.2, + "learning_rate": 3.8777759635778696e-05, + "loss": 0.0385, + "step": 2319, + "task_loss": 0.009624077007174492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7619370950619102, + "compression_loss": 0.0, + "distillation_loss": 0.13709776103496552, + "epoch": 2.2, + "learning_rate": 3.876886730293149e-05, + "loss": 0.1341, + "step": 2320, + "task_loss": 0.10736387968063354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7619557961285286, + "compression_loss": 0.0, + "distillation_loss": 0.0763949453830719, + "epoch": 2.2, + "learning_rate": 3.8759972468903215e-05, + "loss": 0.0731, + "step": 2321, + "task_loss": 0.04355807602405548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7619744910686715, + "compression_loss": 0.0, + "distillation_loss": 0.08063163608312607, + "epoch": 2.21, + "learning_rate": 3.875107513530968e-05, + "loss": 0.0841, + "step": 2322, + "task_loss": 0.11514770984649658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7619931798833426, + "compression_loss": 0.0, + "distillation_loss": 0.05136069282889366, + "epoch": 2.21, + "learning_rate": 3.874217530376711e-05, + "loss": 0.0527, + "step": 2323, + "task_loss": 0.06518974155187607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7620118625735456, + "compression_loss": 0.0, + "distillation_loss": 0.045473020523786545, + "epoch": 2.21, + "learning_rate": 3.873327297589223e-05, + "loss": 0.0417, + "step": 2324, + "task_loss": 0.007644519209861755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7620305391402841, + "compression_loss": 0.0, + "distillation_loss": 0.033313095569610596, + "epoch": 2.21, + "learning_rate": 3.8724368153302166e-05, + "loss": 0.0488, + "step": 2325, + "task_loss": 0.18855777382850647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7620492095845618, + "compression_loss": 0.0, + "distillation_loss": 0.23782016336917877, + "epoch": 2.21, + "learning_rate": 3.871546083761453e-05, + "loss": 0.2333, + "step": 2326, + "task_loss": 0.19272853434085846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7620678739073825, + "compression_loss": 0.0, + "distillation_loss": 0.10806293785572052, + "epoch": 2.21, + "learning_rate": 3.870655103044738e-05, + "loss": 0.1051, + "step": 2327, + "task_loss": 0.07805277407169342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7620865321097497, + "compression_loss": 0.0, + "distillation_loss": 0.1450902819633484, + "epoch": 2.21, + "learning_rate": 3.8697638733419216e-05, + "loss": 0.1578, + "step": 2328, + "task_loss": 0.2723790109157562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7621051841926673, + "compression_loss": 0.0, + "distillation_loss": 0.09044960141181946, + "epoch": 2.21, + "learning_rate": 3.8688723948149014e-05, + "loss": 0.0879, + "step": 2329, + "task_loss": 0.0644555538892746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7621238301571387, + "compression_loss": 0.0, + "distillation_loss": 0.036163873970508575, + "epoch": 2.21, + "learning_rate": 3.867980667625618e-05, + "loss": 0.0502, + "step": 2330, + "task_loss": 0.17629528045654297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7621424700041679, + "compression_loss": 0.0, + "distillation_loss": 0.08255277574062347, + "epoch": 2.21, + "learning_rate": 3.867088691936058e-05, + "loss": 0.0778, + "step": 2331, + "task_loss": 0.03511197119951248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7621611037347583, + "compression_loss": 0.0, + "distillation_loss": 0.1695278137922287, + "epoch": 2.21, + "learning_rate": 3.8661964679082535e-05, + "loss": 0.1618, + "step": 2332, + "task_loss": 0.09199361503124237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7621797313499138, + "compression_loss": 0.0, + "distillation_loss": 0.052597202360630035, + "epoch": 2.22, + "learning_rate": 3.8653039957042806e-05, + "loss": 0.0562, + "step": 2333, + "task_loss": 0.08904554694890976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.762198352850638, + "compression_loss": 0.0, + "distillation_loss": 0.154799684882164, + "epoch": 2.22, + "learning_rate": 3.8644112754862614e-05, + "loss": 0.1498, + "step": 2334, + "task_loss": 0.10432252287864685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7622169682379345, + "compression_loss": 0.0, + "distillation_loss": 0.06855852156877518, + "epoch": 2.22, + "learning_rate": 3.8635183074163636e-05, + "loss": 0.0654, + "step": 2335, + "task_loss": 0.03740303963422775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7622355775128071, + "compression_loss": 0.0, + "distillation_loss": 0.031943485140800476, + "epoch": 2.22, + "learning_rate": 3.862625091656797e-05, + "loss": 0.039, + "step": 2336, + "task_loss": 0.1027441918849945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7622541806762594, + "compression_loss": 0.0, + "distillation_loss": 0.08677913248538971, + "epoch": 2.22, + "learning_rate": 3.861731628369822e-05, + "loss": 0.0888, + "step": 2337, + "task_loss": 0.10726401954889297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7622727777292951, + "compression_loss": 0.0, + "distillation_loss": 0.03856203332543373, + "epoch": 2.22, + "learning_rate": 3.8608379177177375e-05, + "loss": 0.0354, + "step": 2338, + "task_loss": 0.0071256812661886215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7622913686729179, + "compression_loss": 0.0, + "distillation_loss": 0.04021540284156799, + "epoch": 2.22, + "learning_rate": 3.8599439598628916e-05, + "loss": 0.0368, + "step": 2339, + "task_loss": 0.005587218329310417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7623099535081316, + "compression_loss": 0.0, + "distillation_loss": 0.035530779510736465, + "epoch": 2.22, + "learning_rate": 3.8590497549676753e-05, + "loss": 0.0384, + "step": 2340, + "task_loss": 0.06408775597810745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7623285322359397, + "compression_loss": 0.0, + "distillation_loss": 0.16440139710903168, + "epoch": 2.22, + "learning_rate": 3.858155303194526e-05, + "loss": 0.1682, + "step": 2341, + "task_loss": 0.20209884643554688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7623471048573459, + "compression_loss": 0.0, + "distillation_loss": 0.09271198511123657, + "epoch": 2.22, + "learning_rate": 3.8572606047059254e-05, + "loss": 0.0869, + "step": 2342, + "task_loss": 0.03472534567117691 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.762365671373354, + "compression_loss": 0.0, + "distillation_loss": 0.22541561722755432, + "epoch": 2.23, + "learning_rate": 3.856365659664399e-05, + "loss": 0.2111, + "step": 2343, + "task_loss": 0.08197568356990814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7623842317849675, + "compression_loss": 0.0, + "distillation_loss": 0.1988585889339447, + "epoch": 2.23, + "learning_rate": 3.855470468232518e-05, + "loss": 0.2026, + "step": 2344, + "task_loss": 0.23670420050621033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7624027860931902, + "compression_loss": 0.0, + "distillation_loss": 0.08545896410942078, + "epoch": 2.23, + "learning_rate": 3.854575030572898e-05, + "loss": 0.0809, + "step": 2345, + "task_loss": 0.040093451738357544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7624213342990259, + "compression_loss": 0.0, + "distillation_loss": 0.11442442238330841, + "epoch": 2.23, + "learning_rate": 3.853679346848201e-05, + "loss": 0.1172, + "step": 2346, + "task_loss": 0.14235654473304749 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7624398764034781, + "compression_loss": 0.0, + "distillation_loss": 0.08486449718475342, + "epoch": 2.23, + "learning_rate": 3.8527834172211306e-05, + "loss": 0.0992, + "step": 2347, + "task_loss": 0.22856228053569794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7624584124075505, + "compression_loss": 0.0, + "distillation_loss": 0.13581222295761108, + "epoch": 2.23, + "learning_rate": 3.851887241854438e-05, + "loss": 0.129, + "step": 2348, + "task_loss": 0.06761342287063599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7624769423122468, + "compression_loss": 0.0, + "distillation_loss": 0.3619718551635742, + "epoch": 2.23, + "learning_rate": 3.850990820910917e-05, + "loss": 0.3528, + "step": 2349, + "task_loss": 0.27037566900253296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7624954661185708, + "compression_loss": 0.0, + "distillation_loss": 0.1374235451221466, + "epoch": 2.23, + "learning_rate": 3.8500941545534065e-05, + "loss": 0.137, + "step": 2350, + "task_loss": 0.13285070657730103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.762513983827526, + "compression_loss": 0.0, + "distillation_loss": 0.03058495558798313, + "epoch": 2.23, + "learning_rate": 3.849197242944791e-05, + "loss": 0.0281, + "step": 2351, + "task_loss": 0.0058120060712099075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7625324954401163, + "compression_loss": 0.0, + "distillation_loss": 0.26459288597106934, + "epoch": 2.23, + "learning_rate": 3.8483000862479986e-05, + "loss": 0.2585, + "step": 2352, + "task_loss": 0.2032066434621811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7625510009573451, + "compression_loss": 0.0, + "distillation_loss": 0.11497370898723602, + "epoch": 2.23, + "learning_rate": 3.8474026846260015e-05, + "loss": 0.1074, + "step": 2353, + "task_loss": 0.039116568863391876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7625695003802162, + "compression_loss": 0.0, + "distillation_loss": 0.10177917033433914, + "epoch": 2.24, + "learning_rate": 3.846505038241818e-05, + "loss": 0.098, + "step": 2354, + "task_loss": 0.0641433447599411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7625879937097334, + "compression_loss": 0.0, + "distillation_loss": 0.1023842841386795, + "epoch": 2.24, + "learning_rate": 3.84560714725851e-05, + "loss": 0.1054, + "step": 2355, + "task_loss": 0.13256244361400604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7626064809469003, + "compression_loss": 0.0, + "distillation_loss": 0.21765132248401642, + "epoch": 2.24, + "learning_rate": 3.8447090118391814e-05, + "loss": 0.2109, + "step": 2356, + "task_loss": 0.14984160661697388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7626249620927206, + "compression_loss": 0.0, + "distillation_loss": 0.10686811059713364, + "epoch": 2.24, + "learning_rate": 3.8438106321469864e-05, + "loss": 0.1046, + "step": 2357, + "task_loss": 0.08399415016174316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7626434371481979, + "compression_loss": 0.0, + "distillation_loss": 0.06657497584819794, + "epoch": 2.24, + "learning_rate": 3.842912008345117e-05, + "loss": 0.0629, + "step": 2358, + "task_loss": 0.029489833861589432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.762661906114336, + "compression_loss": 0.0, + "distillation_loss": 0.17578309774398804, + "epoch": 2.24, + "learning_rate": 3.842013140596815e-05, + "loss": 0.1783, + "step": 2359, + "task_loss": 0.20061953365802765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7626803689921385, + "compression_loss": 0.0, + "distillation_loss": 0.08999986201524734, + "epoch": 2.24, + "learning_rate": 3.841114029065362e-05, + "loss": 0.1037, + "step": 2360, + "task_loss": 0.22721917927265167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7626988257826091, + "compression_loss": 0.0, + "distillation_loss": 0.18849384784698486, + "epoch": 2.24, + "learning_rate": 3.8402146739140874e-05, + "loss": 0.1817, + "step": 2361, + "task_loss": 0.12036500871181488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7627172764867515, + "compression_loss": 0.0, + "distillation_loss": 0.03334838151931763, + "epoch": 2.24, + "learning_rate": 3.8393150753063614e-05, + "loss": 0.0326, + "step": 2362, + "task_loss": 0.025991789996623993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7627357211055693, + "compression_loss": 0.0, + "distillation_loss": 0.09625150263309479, + "epoch": 2.24, + "learning_rate": 3.838415233405603e-05, + "loss": 0.1046, + "step": 2363, + "task_loss": 0.17937156558036804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7627541596400663, + "compression_loss": 0.0, + "distillation_loss": 0.07551825791597366, + "epoch": 2.25, + "learning_rate": 3.837515148375271e-05, + "loss": 0.0853, + "step": 2364, + "task_loss": 0.17311903834342957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7627725920912461, + "compression_loss": 0.0, + "distillation_loss": 0.06278804689645767, + "epoch": 2.25, + "learning_rate": 3.836614820378871e-05, + "loss": 0.0712, + "step": 2365, + "task_loss": 0.14707757532596588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7627910184601125, + "compression_loss": 0.0, + "distillation_loss": 0.045216597616672516, + "epoch": 2.25, + "learning_rate": 3.835714249579952e-05, + "loss": 0.0581, + "step": 2366, + "task_loss": 0.1742611825466156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.762809438747669, + "compression_loss": 0.0, + "distillation_loss": 0.28981393575668335, + "epoch": 2.25, + "learning_rate": 3.8348134361421064e-05, + "loss": 0.2989, + "step": 2367, + "task_loss": 0.38077259063720703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7628278529549195, + "compression_loss": 0.0, + "distillation_loss": 0.0407429076731205, + "epoch": 2.25, + "learning_rate": 3.8339123802289716e-05, + "loss": 0.037, + "step": 2368, + "task_loss": 0.0035578403621912003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7628462610828675, + "compression_loss": 0.0, + "distillation_loss": 0.08070795238018036, + "epoch": 2.25, + "learning_rate": 3.8330110820042285e-05, + "loss": 0.0862, + "step": 2369, + "task_loss": 0.1353619545698166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7628646631325168, + "compression_loss": 0.0, + "distillation_loss": 0.05324525758624077, + "epoch": 2.25, + "learning_rate": 3.8321095416316024e-05, + "loss": 0.0695, + "step": 2370, + "task_loss": 0.21613724529743195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.762883059104871, + "compression_loss": 0.0, + "distillation_loss": 0.04425017535686493, + "epoch": 2.25, + "learning_rate": 3.831207759274863e-05, + "loss": 0.0407, + "step": 2371, + "task_loss": 0.008313026279211044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7629014490009337, + "compression_loss": 0.0, + "distillation_loss": 0.07859969139099121, + "epoch": 2.25, + "learning_rate": 3.8303057350978224e-05, + "loss": 0.1042, + "step": 2372, + "task_loss": 0.3349103033542633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7629198328217088, + "compression_loss": 0.0, + "distillation_loss": 0.09541267156600952, + "epoch": 2.25, + "learning_rate": 3.829403469264339e-05, + "loss": 0.0935, + "step": 2373, + "task_loss": 0.07653743773698807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7629382105681998, + "compression_loss": 0.0, + "distillation_loss": 0.10529537498950958, + "epoch": 2.25, + "learning_rate": 3.828500961938313e-05, + "loss": 0.0988, + "step": 2374, + "task_loss": 0.040317974984645844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7629565822414105, + "compression_loss": 0.0, + "distillation_loss": 0.050773873925209045, + "epoch": 2.26, + "learning_rate": 3.827598213283688e-05, + "loss": 0.0477, + "step": 2375, + "task_loss": 0.019673151895403862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7629749478423445, + "compression_loss": 0.0, + "distillation_loss": 0.06847432255744934, + "epoch": 2.26, + "learning_rate": 3.8266952234644545e-05, + "loss": 0.0684, + "step": 2376, + "task_loss": 0.06723335385322571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7629933073720057, + "compression_loss": 0.0, + "distillation_loss": 0.1094050258398056, + "epoch": 2.26, + "learning_rate": 3.825791992644644e-05, + "loss": 0.1155, + "step": 2377, + "task_loss": 0.17028164863586426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7630116608313974, + "compression_loss": 0.0, + "distillation_loss": 0.04125671088695526, + "epoch": 2.26, + "learning_rate": 3.824888520988333e-05, + "loss": 0.038, + "step": 2378, + "task_loss": 0.008307870477437973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7630300082215236, + "compression_loss": 0.0, + "distillation_loss": 0.0585186630487442, + "epoch": 2.26, + "learning_rate": 3.823984808659641e-05, + "loss": 0.0588, + "step": 2379, + "task_loss": 0.06130526214838028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7630483495433877, + "compression_loss": 0.0, + "distillation_loss": 0.03627898544073105, + "epoch": 2.26, + "learning_rate": 3.8230808558227335e-05, + "loss": 0.0331, + "step": 2380, + "task_loss": 0.004686055704951286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7630666847979937, + "compression_loss": 0.0, + "distillation_loss": 0.04550597444176674, + "epoch": 2.26, + "learning_rate": 3.8221766626418155e-05, + "loss": 0.0478, + "step": 2381, + "task_loss": 0.06875459849834442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.763085013986345, + "compression_loss": 0.0, + "distillation_loss": 0.057246215641498566, + "epoch": 2.26, + "learning_rate": 3.821272229281139e-05, + "loss": 0.0576, + "step": 2382, + "task_loss": 0.06067638099193573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7631033371094456, + "compression_loss": 0.0, + "distillation_loss": 0.17733870446681976, + "epoch": 2.26, + "learning_rate": 3.820367555904999e-05, + "loss": 0.1736, + "step": 2383, + "task_loss": 0.13981691002845764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7631216541682988, + "compression_loss": 0.0, + "distillation_loss": 0.14336426556110382, + "epoch": 2.26, + "learning_rate": 3.819462642677733e-05, + "loss": 0.1475, + "step": 2384, + "task_loss": 0.184513121843338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7631399651639086, + "compression_loss": 0.0, + "distillation_loss": 0.1907598078250885, + "epoch": 2.26, + "learning_rate": 3.818557489763724e-05, + "loss": 0.1909, + "step": 2385, + "task_loss": 0.19197434186935425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7631582700972785, + "compression_loss": 0.0, + "distillation_loss": 0.09918617457151413, + "epoch": 2.27, + "learning_rate": 3.817652097327396e-05, + "loss": 0.0916, + "step": 2386, + "task_loss": 0.023263249546289444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7631765689694123, + "compression_loss": 0.0, + "distillation_loss": 0.0876680240035057, + "epoch": 2.27, + "learning_rate": 3.81674646553322e-05, + "loss": 0.0851, + "step": 2387, + "task_loss": 0.06229857727885246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7631948617813136, + "compression_loss": 0.0, + "distillation_loss": 0.09220877289772034, + "epoch": 2.27, + "learning_rate": 3.815840594545706e-05, + "loss": 0.0941, + "step": 2388, + "task_loss": 0.11108911037445068 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.763213148533986, + "compression_loss": 0.0, + "distillation_loss": 0.10177421569824219, + "epoch": 2.27, + "learning_rate": 3.814934484529411e-05, + "loss": 0.1073, + "step": 2389, + "task_loss": 0.157059907913208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7632314292284335, + "compression_loss": 0.0, + "distillation_loss": 0.05992235243320465, + "epoch": 2.27, + "learning_rate": 3.8140281356489346e-05, + "loss": 0.0608, + "step": 2390, + "task_loss": 0.06858600676059723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7632497038656594, + "compression_loss": 0.0, + "distillation_loss": 0.0574650838971138, + "epoch": 2.27, + "learning_rate": 3.8131215480689184e-05, + "loss": 0.0586, + "step": 2391, + "task_loss": 0.06853343546390533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7632679724466676, + "compression_loss": 0.0, + "distillation_loss": 0.21620208024978638, + "epoch": 2.27, + "learning_rate": 3.812214721954049e-05, + "loss": 0.2103, + "step": 2392, + "task_loss": 0.15719348192214966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7632862349724616, + "compression_loss": 0.0, + "distillation_loss": 0.17695803940296173, + "epoch": 2.27, + "learning_rate": 3.811307657469055e-05, + "loss": 0.1758, + "step": 2393, + "task_loss": 0.16526271402835846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7633044914440454, + "compression_loss": 0.0, + "distillation_loss": 0.05041082203388214, + "epoch": 2.27, + "learning_rate": 3.8104003547787105e-05, + "loss": 0.0526, + "step": 2394, + "task_loss": 0.07256819307804108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7633227418624224, + "compression_loss": 0.0, + "distillation_loss": 0.03695048391819, + "epoch": 2.27, + "learning_rate": 3.809492814047831e-05, + "loss": 0.0389, + "step": 2395, + "task_loss": 0.05642509087920189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7633409862285965, + "compression_loss": 0.0, + "distillation_loss": 0.1277005672454834, + "epoch": 2.28, + "learning_rate": 3.8085850354412745e-05, + "loss": 0.1204, + "step": 2396, + "task_loss": 0.05449753627181053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7633592245435712, + "compression_loss": 0.0, + "distillation_loss": 0.1139630377292633, + "epoch": 2.28, + "learning_rate": 3.807677019123944e-05, + "loss": 0.1069, + "step": 2397, + "task_loss": 0.042952507734298706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7633774568083502, + "compression_loss": 0.0, + "distillation_loss": 0.24451753497123718, + "epoch": 2.28, + "learning_rate": 3.806768765260785e-05, + "loss": 0.2416, + "step": 2398, + "task_loss": 0.2154046595096588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7633956830239372, + "compression_loss": 0.0, + "distillation_loss": 0.20844148099422455, + "epoch": 2.28, + "learning_rate": 3.805860274016787e-05, + "loss": 0.2083, + "step": 2399, + "task_loss": 0.20691928267478943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.763413903191336, + "compression_loss": 0.0, + "distillation_loss": 0.3789367079734802, + "epoch": 2.28, + "learning_rate": 3.8049515455569816e-05, + "loss": 0.3638, + "step": 2400, + "task_loss": 0.22742155194282532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7634321173115501, + "compression_loss": 0.0, + "distillation_loss": 0.18300700187683105, + "epoch": 2.28, + "learning_rate": 3.804042580046442e-05, + "loss": 0.1727, + "step": 2401, + "task_loss": 0.08020967990159988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7634503253855833, + "compression_loss": 0.0, + "distillation_loss": 0.14992451667785645, + "epoch": 2.28, + "learning_rate": 3.803133377650288e-05, + "loss": 0.1407, + "step": 2402, + "task_loss": 0.05811336264014244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7634685274144393, + "compression_loss": 0.0, + "distillation_loss": 0.20229187607765198, + "epoch": 2.28, + "learning_rate": 3.80222393853368e-05, + "loss": 0.1983, + "step": 2403, + "task_loss": 0.16219428181648254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7634867233991217, + "compression_loss": 0.0, + "distillation_loss": 0.12786564230918884, + "epoch": 2.28, + "learning_rate": 3.801314262861822e-05, + "loss": 0.1255, + "step": 2404, + "task_loss": 0.10410147160291672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7635049133406342, + "compression_loss": 0.0, + "distillation_loss": 0.23157238960266113, + "epoch": 2.28, + "learning_rate": 3.800404350799961e-05, + "loss": 0.2361, + "step": 2405, + "task_loss": 0.27698665857315063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7635230972399806, + "compression_loss": 0.0, + "distillation_loss": 0.06388011574745178, + "epoch": 2.28, + "learning_rate": 3.799494202513386e-05, + "loss": 0.0808, + "step": 2406, + "task_loss": 0.23331348598003387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7635412750981644, + "compression_loss": 0.0, + "distillation_loss": 0.11542148143053055, + "epoch": 2.29, + "learning_rate": 3.798583818167432e-05, + "loss": 0.1163, + "step": 2407, + "task_loss": 0.12456656247377396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7635594469161894, + "compression_loss": 0.0, + "distillation_loss": 0.08499371260404587, + "epoch": 2.29, + "learning_rate": 3.797673197927473e-05, + "loss": 0.0945, + "step": 2408, + "task_loss": 0.17979812622070312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7635776126950591, + "compression_loss": 0.0, + "distillation_loss": 0.1530931293964386, + "epoch": 2.29, + "learning_rate": 3.796762341958927e-05, + "loss": 0.1582, + "step": 2409, + "task_loss": 0.20382657647132874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7635957724357775, + "compression_loss": 0.0, + "distillation_loss": 0.07288970053195953, + "epoch": 2.29, + "learning_rate": 3.795851250427257e-05, + "loss": 0.074, + "step": 2410, + "task_loss": 0.08438847959041595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.763613926139348, + "compression_loss": 0.0, + "distillation_loss": 0.23029862344264984, + "epoch": 2.29, + "learning_rate": 3.794939923497967e-05, + "loss": 0.2204, + "step": 2411, + "task_loss": 0.13161104917526245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7636320738067744, + "compression_loss": 0.0, + "distillation_loss": 0.2242552489042282, + "epoch": 2.29, + "learning_rate": 3.794028361336603e-05, + "loss": 0.2215, + "step": 2412, + "task_loss": 0.1969766765832901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7636502154390605, + "compression_loss": 0.0, + "distillation_loss": 0.13051095604896545, + "epoch": 2.29, + "learning_rate": 3.793116564108754e-05, + "loss": 0.122, + "step": 2413, + "task_loss": 0.04499632865190506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7636683510372098, + "compression_loss": 0.0, + "distillation_loss": 0.03876876085996628, + "epoch": 2.29, + "learning_rate": 3.7922045319800545e-05, + "loss": 0.0358, + "step": 2414, + "task_loss": 0.008744785562157631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.763686480602226, + "compression_loss": 0.0, + "distillation_loss": 0.05910433828830719, + "epoch": 2.29, + "learning_rate": 3.7912922651161783e-05, + "loss": 0.0699, + "step": 2415, + "task_loss": 0.16721457242965698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7637046041351129, + "compression_loss": 0.0, + "distillation_loss": 0.037029679864645004, + "epoch": 2.29, + "learning_rate": 3.790379763682844e-05, + "loss": 0.0346, + "step": 2416, + "task_loss": 0.01284867525100708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7637227216368739, + "compression_loss": 0.0, + "distillation_loss": 0.25016137957572937, + "epoch": 2.3, + "learning_rate": 3.7894670278458096e-05, + "loss": 0.2403, + "step": 2417, + "task_loss": 0.15121634304523468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7637408331085132, + "compression_loss": 0.0, + "distillation_loss": 0.033061422407627106, + "epoch": 2.3, + "learning_rate": 3.7885540577708804e-05, + "loss": 0.036, + "step": 2418, + "task_loss": 0.06259030848741531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7637589385510339, + "compression_loss": 0.0, + "distillation_loss": 0.027268044650554657, + "epoch": 2.3, + "learning_rate": 3.7876408536239006e-05, + "loss": 0.0253, + "step": 2419, + "task_loss": 0.007266120985150337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76377703796544, + "compression_loss": 0.0, + "distillation_loss": 0.14146488904953003, + "epoch": 2.3, + "learning_rate": 3.7867274155707585e-05, + "loss": 0.1379, + "step": 2420, + "task_loss": 0.10578147321939468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7637951313527352, + "compression_loss": 0.0, + "distillation_loss": 0.25725674629211426, + "epoch": 2.3, + "learning_rate": 3.7858137437773845e-05, + "loss": 0.2562, + "step": 2421, + "task_loss": 0.2463308572769165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.763813218713923, + "compression_loss": 0.0, + "distillation_loss": 0.15272045135498047, + "epoch": 2.3, + "learning_rate": 3.784899838409751e-05, + "loss": 0.1586, + "step": 2422, + "task_loss": 0.2110539674758911 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7638313000500074, + "compression_loss": 0.0, + "distillation_loss": 0.040354013442993164, + "epoch": 2.3, + "learning_rate": 3.783985699633874e-05, + "loss": 0.0399, + "step": 2423, + "task_loss": 0.03630523383617401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7638493753619917, + "compression_loss": 0.0, + "distillation_loss": 0.1408417522907257, + "epoch": 2.3, + "learning_rate": 3.783071327615811e-05, + "loss": 0.1307, + "step": 2424, + "task_loss": 0.03991064056754112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7638674446508799, + "compression_loss": 0.0, + "distillation_loss": 0.0335991308093071, + "epoch": 2.3, + "learning_rate": 3.7821567225216615e-05, + "loss": 0.0307, + "step": 2425, + "task_loss": 0.004980664700269699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7638855079176754, + "compression_loss": 0.0, + "distillation_loss": 0.13876627385616302, + "epoch": 2.3, + "learning_rate": 3.781241884517569e-05, + "loss": 0.1474, + "step": 2426, + "task_loss": 0.2252703756093979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7639035651633821, + "compression_loss": 0.0, + "distillation_loss": 0.18450605869293213, + "epoch": 2.3, + "learning_rate": 3.780326813769717e-05, + "loss": 0.1689, + "step": 2427, + "task_loss": 0.028058160096406937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7639216163890036, + "compression_loss": 0.0, + "distillation_loss": 0.42347198724746704, + "epoch": 2.31, + "learning_rate": 3.779411510444334e-05, + "loss": 0.4095, + "step": 2428, + "task_loss": 0.2832689583301544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7639396615955436, + "compression_loss": 0.0, + "distillation_loss": 0.09357698261737823, + "epoch": 2.31, + "learning_rate": 3.778495974707688e-05, + "loss": 0.0918, + "step": 2429, + "task_loss": 0.07574363052845001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7639577007840057, + "compression_loss": 0.0, + "distillation_loss": 0.15994183719158173, + "epoch": 2.31, + "learning_rate": 3.7775802067260905e-05, + "loss": 0.1569, + "step": 2430, + "task_loss": 0.1296941488981247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7639757339553938, + "compression_loss": 0.0, + "distillation_loss": 0.078113853931427, + "epoch": 2.31, + "learning_rate": 3.776664206665896e-05, + "loss": 0.0896, + "step": 2431, + "task_loss": 0.19323307275772095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7639937611107113, + "compression_loss": 0.0, + "distillation_loss": 0.028910305351018906, + "epoch": 2.31, + "learning_rate": 3.7757479746935e-05, + "loss": 0.0324, + "step": 2432, + "task_loss": 0.06403736025094986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.764011782250962, + "compression_loss": 0.0, + "distillation_loss": 0.03515395522117615, + "epoch": 2.31, + "learning_rate": 3.77483151097534e-05, + "loss": 0.0325, + "step": 2433, + "task_loss": 0.008116895332932472 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7640297973771497, + "compression_loss": 0.0, + "distillation_loss": 0.07505205273628235, + "epoch": 2.31, + "learning_rate": 3.773914815677897e-05, + "loss": 0.0719, + "step": 2434, + "task_loss": 0.04383193701505661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7640478064902779, + "compression_loss": 0.0, + "distillation_loss": 0.204584002494812, + "epoch": 2.31, + "learning_rate": 3.7729978889676915e-05, + "loss": 0.1907, + "step": 2435, + "task_loss": 0.06581425666809082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7640658095913504, + "compression_loss": 0.0, + "distillation_loss": 0.1545599400997162, + "epoch": 2.31, + "learning_rate": 3.7720807310112896e-05, + "loss": 0.1583, + "step": 2436, + "task_loss": 0.19169500470161438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7640838066813709, + "compression_loss": 0.0, + "distillation_loss": 0.10692571848630905, + "epoch": 2.31, + "learning_rate": 3.7711633419752954e-05, + "loss": 0.1141, + "step": 2437, + "task_loss": 0.17911353707313538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7641017977613429, + "compression_loss": 0.0, + "distillation_loss": 0.2065647542476654, + "epoch": 2.32, + "learning_rate": 3.7702457220263595e-05, + "loss": 0.206, + "step": 2438, + "task_loss": 0.20088602602481842 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7641197828322703, + "compression_loss": 0.0, + "distillation_loss": 0.1430378258228302, + "epoch": 2.32, + "learning_rate": 3.76932787133117e-05, + "loss": 0.1612, + "step": 2439, + "task_loss": 0.3245554566383362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7641377618951566, + "compression_loss": 0.0, + "distillation_loss": 0.12717294692993164, + "epoch": 2.32, + "learning_rate": 3.768409790056459e-05, + "loss": 0.1219, + "step": 2440, + "task_loss": 0.07445275783538818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7641557349510057, + "compression_loss": 0.0, + "distillation_loss": 0.06758947670459747, + "epoch": 2.32, + "learning_rate": 3.7674914783690006e-05, + "loss": 0.0633, + "step": 2441, + "task_loss": 0.024319326505064964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7641737020008211, + "compression_loss": 0.0, + "distillation_loss": 0.0334925502538681, + "epoch": 2.32, + "learning_rate": 3.7665729364356115e-05, + "loss": 0.0313, + "step": 2442, + "task_loss": 0.011463357135653496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7641916630456065, + "compression_loss": 0.0, + "distillation_loss": 0.112037792801857, + "epoch": 2.32, + "learning_rate": 3.7656541644231494e-05, + "loss": 0.1136, + "step": 2443, + "task_loss": 0.12805123627185822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7642096180863657, + "compression_loss": 0.0, + "distillation_loss": 0.08280123025178909, + "epoch": 2.32, + "learning_rate": 3.764735162498512e-05, + "loss": 0.0867, + "step": 2444, + "task_loss": 0.1218644231557846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7642275671241022, + "compression_loss": 0.0, + "distillation_loss": 0.16756752133369446, + "epoch": 2.32, + "learning_rate": 3.763815930828641e-05, + "loss": 0.1726, + "step": 2445, + "task_loss": 0.21819201111793518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7642455101598198, + "compression_loss": 0.0, + "distillation_loss": 0.18085908889770508, + "epoch": 2.32, + "learning_rate": 3.76289646958052e-05, + "loss": 0.1723, + "step": 2446, + "task_loss": 0.09551871567964554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7642634471945222, + "compression_loss": 0.0, + "distillation_loss": 0.11347385495901108, + "epoch": 2.32, + "learning_rate": 3.761976778921173e-05, + "loss": 0.1107, + "step": 2447, + "task_loss": 0.08610384166240692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.764281378229213, + "compression_loss": 0.0, + "distillation_loss": 0.029696688055992126, + "epoch": 2.32, + "learning_rate": 3.761056859017667e-05, + "loss": 0.0276, + "step": 2448, + "task_loss": 0.009167637676000595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7642993032648961, + "compression_loss": 0.0, + "distillation_loss": 0.1404319703578949, + "epoch": 2.33, + "learning_rate": 3.7601367100371085e-05, + "loss": 0.1482, + "step": 2449, + "task_loss": 0.21834945678710938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7643172223025749, + "compression_loss": 0.0, + "distillation_loss": 0.15198810398578644, + "epoch": 2.33, + "learning_rate": 3.759216332146649e-05, + "loss": 0.1448, + "step": 2450, + "task_loss": 0.07967476546764374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7643351353432531, + "compression_loss": 0.0, + "distillation_loss": 0.0767270177602768, + "epoch": 2.33, + "learning_rate": 3.7582957255134765e-05, + "loss": 0.0727, + "step": 2451, + "task_loss": 0.03611510992050171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7643530423879347, + "compression_loss": 0.0, + "distillation_loss": 0.06880663335323334, + "epoch": 2.33, + "learning_rate": 3.7573748903048266e-05, + "loss": 0.0728, + "step": 2452, + "task_loss": 0.10889902710914612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7643709434376229, + "compression_loss": 0.0, + "distillation_loss": 0.11665643751621246, + "epoch": 2.33, + "learning_rate": 3.756453826687972e-05, + "loss": 0.1273, + "step": 2453, + "task_loss": 0.2232402116060257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7643888384933218, + "compression_loss": 0.0, + "distillation_loss": 0.11299058794975281, + "epoch": 2.33, + "learning_rate": 3.755532534830229e-05, + "loss": 0.1148, + "step": 2454, + "task_loss": 0.13139456510543823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7644067275560349, + "compression_loss": 0.0, + "distillation_loss": 0.14656969904899597, + "epoch": 2.33, + "learning_rate": 3.7546110148989535e-05, + "loss": 0.1399, + "step": 2455, + "task_loss": 0.07960043847560883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7644246106267659, + "compression_loss": 0.0, + "distillation_loss": 0.09302163124084473, + "epoch": 2.33, + "learning_rate": 3.7536892670615454e-05, + "loss": 0.0898, + "step": 2456, + "task_loss": 0.06115756928920746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7644424877065185, + "compression_loss": 0.0, + "distillation_loss": 0.27814599871635437, + "epoch": 2.33, + "learning_rate": 3.752767291485444e-05, + "loss": 0.29, + "step": 2457, + "task_loss": 0.3964260220527649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7644603587962964, + "compression_loss": 0.0, + "distillation_loss": 0.13266442716121674, + "epoch": 2.33, + "learning_rate": 3.7518450883381306e-05, + "loss": 0.1348, + "step": 2458, + "task_loss": 0.15446260571479797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7644782238971031, + "compression_loss": 0.0, + "distillation_loss": 0.24767599999904633, + "epoch": 2.34, + "learning_rate": 3.750922657787128e-05, + "loss": 0.2381, + "step": 2459, + "task_loss": 0.15177518129348755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7644960830099425, + "compression_loss": 0.0, + "distillation_loss": 0.044840142130851746, + "epoch": 2.34, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.051, + "step": 2460, + "task_loss": 0.10653319954872131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7645139361358183, + "compression_loss": 0.0, + "distillation_loss": 0.039941057562828064, + "epoch": 2.34, + "learning_rate": 3.7490771151443525e-05, + "loss": 0.0537, + "step": 2461, + "task_loss": 0.17739588022232056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.764531783275734, + "compression_loss": 0.0, + "distillation_loss": 0.03993482142686844, + "epoch": 2.34, + "learning_rate": 3.748154003387831e-05, + "loss": 0.0367, + "step": 2462, + "task_loss": 0.00717165507376194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7645496244306934, + "compression_loss": 0.0, + "distillation_loss": 0.12393603473901749, + "epoch": 2.34, + "learning_rate": 3.7472306648981235e-05, + "loss": 0.1282, + "step": 2463, + "task_loss": 0.16707316040992737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7645674596017003, + "compression_loss": 0.0, + "distillation_loss": 0.07649366557598114, + "epoch": 2.34, + "learning_rate": 3.746307099842959e-05, + "loss": 0.0711, + "step": 2464, + "task_loss": 0.022800806909799576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7645852887897581, + "compression_loss": 0.0, + "distillation_loss": 0.20021630823612213, + "epoch": 2.34, + "learning_rate": 3.745383308390108e-05, + "loss": 0.2007, + "step": 2465, + "task_loss": 0.20535027980804443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7646031119958706, + "compression_loss": 0.0, + "distillation_loss": 0.2035316526889801, + "epoch": 2.34, + "learning_rate": 3.74445929070738e-05, + "loss": 0.196, + "step": 2466, + "task_loss": 0.1283217966556549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7646209292210415, + "compression_loss": 0.0, + "distillation_loss": 0.06502929329872131, + "epoch": 2.34, + "learning_rate": 3.74353504696263e-05, + "loss": 0.0669, + "step": 2467, + "task_loss": 0.08407002687454224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7646387404662746, + "compression_loss": 0.0, + "distillation_loss": 0.2670043408870697, + "epoch": 2.34, + "learning_rate": 3.742610577323749e-05, + "loss": 0.275, + "step": 2468, + "task_loss": 0.34709686040878296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7646565457325735, + "compression_loss": 0.0, + "distillation_loss": 0.06426650285720825, + "epoch": 2.34, + "learning_rate": 3.7416858819586724e-05, + "loss": 0.0684, + "step": 2469, + "task_loss": 0.10525249689817429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7646743450209417, + "compression_loss": 0.0, + "distillation_loss": 0.15390363335609436, + "epoch": 2.35, + "learning_rate": 3.740760961035375e-05, + "loss": 0.1644, + "step": 2470, + "task_loss": 0.258728951215744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7646921383323831, + "compression_loss": 0.0, + "distillation_loss": 0.040257424116134644, + "epoch": 2.35, + "learning_rate": 3.739835814721874e-05, + "loss": 0.037, + "step": 2471, + "task_loss": 0.0077125392854213715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7647099256679013, + "compression_loss": 0.0, + "distillation_loss": 0.12947189807891846, + "epoch": 2.35, + "learning_rate": 3.738910443186226e-05, + "loss": 0.1284, + "step": 2472, + "task_loss": 0.1188727617263794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7647277070285, + "compression_loss": 0.0, + "distillation_loss": 0.16135096549987793, + "epoch": 2.35, + "learning_rate": 3.737984846596528e-05, + "loss": 0.1543, + "step": 2473, + "task_loss": 0.09042131900787354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7647454824151829, + "compression_loss": 0.0, + "distillation_loss": 0.02904283069074154, + "epoch": 2.35, + "learning_rate": 3.737059025120922e-05, + "loss": 0.0267, + "step": 2474, + "task_loss": 0.005899908021092415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7647632518289538, + "compression_loss": 0.0, + "distillation_loss": 0.11024047434329987, + "epoch": 2.35, + "learning_rate": 3.7361329789275855e-05, + "loss": 0.1193, + "step": 2475, + "task_loss": 0.20116351544857025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7647810152708161, + "compression_loss": 0.0, + "distillation_loss": 0.04543491452932358, + "epoch": 2.35, + "learning_rate": 3.7352067081847405e-05, + "loss": 0.042, + "step": 2476, + "task_loss": 0.010787051171064377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7647987727417737, + "compression_loss": 0.0, + "distillation_loss": 0.09550483524799347, + "epoch": 2.35, + "learning_rate": 3.734280213060649e-05, + "loss": 0.1008, + "step": 2477, + "task_loss": 0.1483430564403534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7648165242428301, + "compression_loss": 0.0, + "distillation_loss": 0.08557014912366867, + "epoch": 2.35, + "learning_rate": 3.7333534937236105e-05, + "loss": 0.0859, + "step": 2478, + "task_loss": 0.08859314769506454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7648342697749891, + "compression_loss": 0.0, + "distillation_loss": 0.12534424662590027, + "epoch": 2.35, + "learning_rate": 3.7324265503419716e-05, + "loss": 0.123, + "step": 2479, + "task_loss": 0.10213040560483932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7648520093392545, + "compression_loss": 0.0, + "distillation_loss": 0.09746745228767395, + "epoch": 2.36, + "learning_rate": 3.731499383084114e-05, + "loss": 0.1161, + "step": 2480, + "task_loss": 0.2837420403957367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7648697429366297, + "compression_loss": 0.0, + "distillation_loss": 0.09911562502384186, + "epoch": 2.36, + "learning_rate": 3.730571992118462e-05, + "loss": 0.1044, + "step": 2481, + "task_loss": 0.1517096608877182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7648874705681187, + "compression_loss": 0.0, + "distillation_loss": 0.04106239229440689, + "epoch": 2.36, + "learning_rate": 3.7296443776134814e-05, + "loss": 0.046, + "step": 2482, + "task_loss": 0.0909072607755661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7649051922347249, + "compression_loss": 0.0, + "distillation_loss": 0.16157306730747223, + "epoch": 2.36, + "learning_rate": 3.7287165397376775e-05, + "loss": 0.17, + "step": 2483, + "task_loss": 0.2461170256137848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7649229079374521, + "compression_loss": 0.0, + "distillation_loss": 0.034094762057065964, + "epoch": 2.36, + "learning_rate": 3.727788478659597e-05, + "loss": 0.0376, + "step": 2484, + "task_loss": 0.06928237527608871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7649406176773041, + "compression_loss": 0.0, + "distillation_loss": 0.06985623389482498, + "epoch": 2.36, + "learning_rate": 3.726860194547826e-05, + "loss": 0.0685, + "step": 2485, + "task_loss": 0.05666497349739075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7649583214552844, + "compression_loss": 0.0, + "distillation_loss": 0.07079368084669113, + "epoch": 2.36, + "learning_rate": 3.725931687570992e-05, + "loss": 0.0717, + "step": 2486, + "task_loss": 0.07995907217264175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7649760192723968, + "compression_loss": 0.0, + "distillation_loss": 0.056753143668174744, + "epoch": 2.36, + "learning_rate": 3.7250029578977625e-05, + "loss": 0.056, + "step": 2487, + "task_loss": 0.04943781718611717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7649937111296448, + "compression_loss": 0.0, + "distillation_loss": 0.08018404990434647, + "epoch": 2.36, + "learning_rate": 3.724074005696847e-05, + "loss": 0.0841, + "step": 2488, + "task_loss": 0.11959824711084366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7650113970280323, + "compression_loss": 0.0, + "distillation_loss": 0.17123107612133026, + "epoch": 2.36, + "learning_rate": 3.723144831136992e-05, + "loss": 0.1641, + "step": 2489, + "task_loss": 0.10034439712762833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7650290769685629, + "compression_loss": 0.0, + "distillation_loss": 0.06683574616909027, + "epoch": 2.36, + "learning_rate": 3.722215434386988e-05, + "loss": 0.0654, + "step": 2490, + "task_loss": 0.05212263762950897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7650467509522403, + "compression_loss": 0.0, + "distillation_loss": 0.015314219519495964, + "epoch": 2.37, + "learning_rate": 3.721285815615665e-05, + "loss": 0.0142, + "step": 2491, + "task_loss": 0.003950970247387886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.765064418980068, + "compression_loss": 0.0, + "distillation_loss": 0.04659897834062576, + "epoch": 2.37, + "learning_rate": 3.7203559749918904e-05, + "loss": 0.0624, + "step": 2492, + "task_loss": 0.20480214059352875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7650820810530501, + "compression_loss": 0.0, + "distillation_loss": 0.12938767671585083, + "epoch": 2.37, + "learning_rate": 3.7194259126845764e-05, + "loss": 0.1231, + "step": 2493, + "task_loss": 0.0663764700293541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7650997371721899, + "compression_loss": 0.0, + "distillation_loss": 0.07641440629959106, + "epoch": 2.37, + "learning_rate": 3.7184956288626724e-05, + "loss": 0.0879, + "step": 2494, + "task_loss": 0.1917191445827484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7651173873384912, + "compression_loss": 0.0, + "distillation_loss": 0.08198924362659454, + "epoch": 2.37, + "learning_rate": 3.71756512369517e-05, + "loss": 0.0815, + "step": 2495, + "task_loss": 0.077280193567276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7651350315529577, + "compression_loss": 0.0, + "distillation_loss": 0.15662863850593567, + "epoch": 2.37, + "learning_rate": 3.716634397351097e-05, + "loss": 0.1554, + "step": 2496, + "task_loss": 0.14391350746154785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.765152669816593, + "compression_loss": 0.0, + "distillation_loss": 0.0537070631980896, + "epoch": 2.37, + "learning_rate": 3.715703449999528e-05, + "loss": 0.0495, + "step": 2497, + "task_loss": 0.011994145810604095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7651703021304009, + "compression_loss": 0.0, + "distillation_loss": 0.20824488997459412, + "epoch": 2.37, + "learning_rate": 3.7147722818095724e-05, + "loss": 0.2049, + "step": 2498, + "task_loss": 0.17516371607780457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7651879284953851, + "compression_loss": 0.0, + "distillation_loss": 0.0815531462430954, + "epoch": 2.37, + "learning_rate": 3.713840892950381e-05, + "loss": 0.089, + "step": 2499, + "task_loss": 0.15595076978206635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7652055489125491, + "compression_loss": 0.0, + "distillation_loss": 0.04428771510720253, + "epoch": 2.37, + "learning_rate": 3.712909283591145e-05, + "loss": 0.0555, + "step": 2500, + "task_loss": 0.15677018463611603 + }, + { + "epoch": 2.37, + "eval_accuracy": 0.8864678899082569, + "eval_loss": 0.4365707039833069, + "eval_runtime": 18.2037, + "eval_samples_per_second": 47.902, + "eval_steps_per_second": 5.988, + "step": 2500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7652231633828968, + "compression_loss": 0.0, + "distillation_loss": 0.03173205256462097, + "epoch": 2.38, + "learning_rate": 3.7119774539010967e-05, + "loss": 0.0396, + "step": 2501, + "task_loss": 0.10992399603128433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7652407719074318, + "compression_loss": 0.0, + "distillation_loss": 0.03812164068222046, + "epoch": 2.38, + "learning_rate": 3.711045404049507e-05, + "loss": 0.0511, + "step": 2502, + "task_loss": 0.16801124811172485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7652583744871576, + "compression_loss": 0.0, + "distillation_loss": 0.054551735520362854, + "epoch": 2.38, + "learning_rate": 3.710113134205687e-05, + "loss": 0.0503, + "step": 2503, + "task_loss": 0.011674726381897926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7652759711230782, + "compression_loss": 0.0, + "distillation_loss": 0.057191766798496246, + "epoch": 2.38, + "learning_rate": 3.709180644538988e-05, + "loss": 0.0556, + "step": 2504, + "task_loss": 0.04153808206319809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7652935618161971, + "compression_loss": 0.0, + "distillation_loss": 0.07190201431512833, + "epoch": 2.38, + "learning_rate": 3.708247935218802e-05, + "loss": 0.0705, + "step": 2505, + "task_loss": 0.05747076869010925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.765311146567518, + "compression_loss": 0.0, + "distillation_loss": 0.0554814338684082, + "epoch": 2.38, + "learning_rate": 3.707315006414559e-05, + "loss": 0.0563, + "step": 2506, + "task_loss": 0.06399840116500854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7653287253780445, + "compression_loss": 0.0, + "distillation_loss": 0.14144667983055115, + "epoch": 2.38, + "learning_rate": 3.706381858295731e-05, + "loss": 0.1652, + "step": 2507, + "task_loss": 0.37935495376586914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7653462982487805, + "compression_loss": 0.0, + "distillation_loss": 0.19897310435771942, + "epoch": 2.38, + "learning_rate": 3.705448491031829e-05, + "loss": 0.2037, + "step": 2508, + "task_loss": 0.24580544233322144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7653638651807295, + "compression_loss": 0.0, + "distillation_loss": 0.03850369527935982, + "epoch": 2.38, + "learning_rate": 3.7045149047924016e-05, + "loss": 0.0414, + "step": 2509, + "task_loss": 0.06746485829353333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7653814261748952, + "compression_loss": 0.0, + "distillation_loss": 0.01753471978008747, + "epoch": 2.38, + "learning_rate": 3.703581099747041e-05, + "loss": 0.0161, + "step": 2510, + "task_loss": 0.002795502543449402 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7653989812322815, + "compression_loss": 0.0, + "distillation_loss": 0.1509072184562683, + "epoch": 2.38, + "learning_rate": 3.702647076065378e-05, + "loss": 0.1567, + "step": 2511, + "task_loss": 0.2087806612253189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7654165303538917, + "compression_loss": 0.0, + "distillation_loss": 0.19133007526397705, + "epoch": 2.39, + "learning_rate": 3.701712833917082e-05, + "loss": 0.1858, + "step": 2512, + "task_loss": 0.13650889694690704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7654340735407298, + "compression_loss": 0.0, + "distillation_loss": 0.11685886234045029, + "epoch": 2.39, + "learning_rate": 3.700778373471861e-05, + "loss": 0.1192, + "step": 2513, + "task_loss": 0.1399974673986435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7654516107937992, + "compression_loss": 0.0, + "distillation_loss": 0.04263558238744736, + "epoch": 2.39, + "learning_rate": 3.699843694899467e-05, + "loss": 0.0548, + "step": 2514, + "task_loss": 0.16461949050426483 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7654691421141039, + "compression_loss": 0.0, + "distillation_loss": 0.13114984333515167, + "epoch": 2.39, + "learning_rate": 3.698908798369686e-05, + "loss": 0.1309, + "step": 2515, + "task_loss": 0.12871742248535156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7654866675026474, + "compression_loss": 0.0, + "distillation_loss": 0.08641094714403152, + "epoch": 2.39, + "learning_rate": 3.697973684052347e-05, + "loss": 0.0799, + "step": 2516, + "task_loss": 0.021780574694275856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7655041869604334, + "compression_loss": 0.0, + "distillation_loss": 0.05185171589255333, + "epoch": 2.39, + "learning_rate": 3.697038352117321e-05, + "loss": 0.055, + "step": 2517, + "task_loss": 0.08340021222829819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7655217004884656, + "compression_loss": 0.0, + "distillation_loss": 0.0397905595600605, + "epoch": 2.39, + "learning_rate": 3.6961028027345114e-05, + "loss": 0.057, + "step": 2518, + "task_loss": 0.21199220418930054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7655392080877477, + "compression_loss": 0.0, + "distillation_loss": 0.08754666894674301, + "epoch": 2.39, + "learning_rate": 3.695167036073868e-05, + "loss": 0.0948, + "step": 2519, + "task_loss": 0.1603783816099167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7655567097592834, + "compression_loss": 0.0, + "distillation_loss": 0.049034878611564636, + "epoch": 2.39, + "learning_rate": 3.694231052305376e-05, + "loss": 0.0531, + "step": 2520, + "task_loss": 0.09004518389701843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7655742055040762, + "compression_loss": 0.0, + "distillation_loss": 0.17068904638290405, + "epoch": 2.39, + "learning_rate": 3.693294851599063e-05, + "loss": 0.162, + "step": 2521, + "task_loss": 0.08347401022911072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76559169532313, + "compression_loss": 0.0, + "distillation_loss": 0.2087569534778595, + "epoch": 2.4, + "learning_rate": 3.692358434124992e-05, + "loss": 0.2005, + "step": 2522, + "task_loss": 0.12605254352092743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7656091792174484, + "compression_loss": 0.0, + "distillation_loss": 0.18309544026851654, + "epoch": 2.4, + "learning_rate": 3.69142180005327e-05, + "loss": 0.1849, + "step": 2523, + "task_loss": 0.20159180462360382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7656266571880351, + "compression_loss": 0.0, + "distillation_loss": 0.09189262241125107, + "epoch": 2.4, + "learning_rate": 3.69048494955404e-05, + "loss": 0.0892, + "step": 2524, + "task_loss": 0.06457659602165222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7656441292358938, + "compression_loss": 0.0, + "distillation_loss": 0.06329502165317535, + "epoch": 2.4, + "learning_rate": 3.689547882797485e-05, + "loss": 0.0645, + "step": 2525, + "task_loss": 0.07518291473388672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.765661595362028, + "compression_loss": 0.0, + "distillation_loss": 0.04618072509765625, + "epoch": 2.4, + "learning_rate": 3.688610599953828e-05, + "loss": 0.0424, + "step": 2526, + "task_loss": 0.008488666266202927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7656790555674418, + "compression_loss": 0.0, + "distillation_loss": 0.1281333714723587, + "epoch": 2.4, + "learning_rate": 3.6876731011933316e-05, + "loss": 0.1215, + "step": 2527, + "task_loss": 0.06217958778142929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7656965098531384, + "compression_loss": 0.0, + "distillation_loss": 0.10788536071777344, + "epoch": 2.4, + "learning_rate": 3.686735386686296e-05, + "loss": 0.1122, + "step": 2528, + "task_loss": 0.15063226222991943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7657139582201218, + "compression_loss": 0.0, + "distillation_loss": 0.11169138550758362, + "epoch": 2.4, + "learning_rate": 3.685797456603062e-05, + "loss": 0.1213, + "step": 2529, + "task_loss": 0.20762555301189423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7657314006693955, + "compression_loss": 0.0, + "distillation_loss": 0.2469841092824936, + "epoch": 2.4, + "learning_rate": 3.684859311114009e-05, + "loss": 0.2493, + "step": 2530, + "task_loss": 0.2696739435195923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7657488372019633, + "compression_loss": 0.0, + "distillation_loss": 0.08042797446250916, + "epoch": 2.4, + "learning_rate": 3.6839209503895566e-05, + "loss": 0.0757, + "step": 2531, + "task_loss": 0.033532898873090744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7657662678188288, + "compression_loss": 0.0, + "distillation_loss": 0.05556613951921463, + "epoch": 2.4, + "learning_rate": 3.6829823746001616e-05, + "loss": 0.0513, + "step": 2532, + "task_loss": 0.013295382261276245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7657836925209958, + "compression_loss": 0.0, + "distillation_loss": 0.13678494095802307, + "epoch": 2.41, + "learning_rate": 3.6820435839163205e-05, + "loss": 0.1351, + "step": 2533, + "task_loss": 0.12005842477083206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7658011113094678, + "compression_loss": 0.0, + "distillation_loss": 0.07145422697067261, + "epoch": 2.41, + "learning_rate": 3.68110457850857e-05, + "loss": 0.0678, + "step": 2534, + "task_loss": 0.03514321148395538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7658185241852488, + "compression_loss": 0.0, + "distillation_loss": 0.1274898201227188, + "epoch": 2.41, + "learning_rate": 3.680165358547484e-05, + "loss": 0.1235, + "step": 2535, + "task_loss": 0.08788056671619415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7658359311493421, + "compression_loss": 0.0, + "distillation_loss": 0.17474448680877686, + "epoch": 2.41, + "learning_rate": 3.6792259242036776e-05, + "loss": 0.1785, + "step": 2536, + "task_loss": 0.21268706023693085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7658533322027515, + "compression_loss": 0.0, + "distillation_loss": 0.11320266127586365, + "epoch": 2.41, + "learning_rate": 3.678286275647802e-05, + "loss": 0.1103, + "step": 2537, + "task_loss": 0.08437247574329376 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7658707273464809, + "compression_loss": 0.0, + "distillation_loss": 0.15101607143878937, + "epoch": 2.41, + "learning_rate": 3.677346413050551e-05, + "loss": 0.15, + "step": 2538, + "task_loss": 0.14134559035301208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7658881165815338, + "compression_loss": 0.0, + "distillation_loss": 0.10815154016017914, + "epoch": 2.41, + "learning_rate": 3.6764063365826525e-05, + "loss": 0.1181, + "step": 2539, + "task_loss": 0.2076764553785324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7659054999089138, + "compression_loss": 0.0, + "distillation_loss": 0.10024487972259521, + "epoch": 2.41, + "learning_rate": 3.675466046414878e-05, + "loss": 0.1026, + "step": 2540, + "task_loss": 0.12348996847867966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7659228773296247, + "compression_loss": 0.0, + "distillation_loss": 0.033068105578422546, + "epoch": 2.41, + "learning_rate": 3.674525542718035e-05, + "loss": 0.0304, + "step": 2541, + "task_loss": 0.006272824481129646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7659402488446703, + "compression_loss": 0.0, + "distillation_loss": 0.05263807624578476, + "epoch": 2.41, + "learning_rate": 3.6735848256629705e-05, + "loss": 0.0482, + "step": 2542, + "task_loss": 0.008157419040799141 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.765957614455054, + "compression_loss": 0.0, + "distillation_loss": 0.06785233318805695, + "epoch": 2.42, + "learning_rate": 3.6726438954205714e-05, + "loss": 0.0744, + "step": 2543, + "task_loss": 0.13311488926410675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7659749741617796, + "compression_loss": 0.0, + "distillation_loss": 0.034298889338970184, + "epoch": 2.42, + "learning_rate": 3.6717027521617595e-05, + "loss": 0.0315, + "step": 2544, + "task_loss": 0.006205489858984947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7659923279658509, + "compression_loss": 0.0, + "distillation_loss": 0.19808626174926758, + "epoch": 2.42, + "learning_rate": 3.6707613960575006e-05, + "loss": 0.1921, + "step": 2545, + "task_loss": 0.13829779624938965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7660096758682715, + "compression_loss": 0.0, + "distillation_loss": 0.11868824064731598, + "epoch": 2.42, + "learning_rate": 3.669819827278795e-05, + "loss": 0.118, + "step": 2546, + "task_loss": 0.11179932951927185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766027017870045, + "compression_loss": 0.0, + "distillation_loss": 0.018186789005994797, + "epoch": 2.42, + "learning_rate": 3.668878045996685e-05, + "loss": 0.0167, + "step": 2547, + "task_loss": 0.0034119393676519394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7660443539721753, + "compression_loss": 0.0, + "distillation_loss": 0.05021512135863304, + "epoch": 2.42, + "learning_rate": 3.667936052382248e-05, + "loss": 0.0501, + "step": 2548, + "task_loss": 0.04938085749745369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7660616841756658, + "compression_loss": 0.0, + "distillation_loss": 0.1335231363773346, + "epoch": 2.42, + "learning_rate": 3.666993846606602e-05, + "loss": 0.1321, + "step": 2549, + "task_loss": 0.11888933926820755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7660790084815203, + "compression_loss": 0.0, + "distillation_loss": 0.07150552421808243, + "epoch": 2.42, + "learning_rate": 3.666051428840904e-05, + "loss": 0.0767, + "step": 2550, + "task_loss": 0.12331639230251312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7660963268907426, + "compression_loss": 0.0, + "distillation_loss": 0.3968278169631958, + "epoch": 2.42, + "learning_rate": 3.665108799256348e-05, + "loss": 0.4067, + "step": 2551, + "task_loss": 0.4955511689186096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7661136394043363, + "compression_loss": 0.0, + "distillation_loss": 0.2079869657754898, + "epoch": 2.42, + "learning_rate": 3.6641659580241665e-05, + "loss": 0.2161, + "step": 2552, + "task_loss": 0.2889349162578583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766130946023305, + "compression_loss": 0.0, + "distillation_loss": 0.21949072182178497, + "epoch": 2.42, + "learning_rate": 3.663222905315633e-05, + "loss": 0.2231, + "step": 2553, + "task_loss": 0.25531524419784546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7661482467486525, + "compression_loss": 0.0, + "distillation_loss": 0.1400509923696518, + "epoch": 2.43, + "learning_rate": 3.662279641302056e-05, + "loss": 0.1469, + "step": 2554, + "task_loss": 0.2087712436914444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7661655415813824, + "compression_loss": 0.0, + "distillation_loss": 0.029690932482481003, + "epoch": 2.43, + "learning_rate": 3.6613361661547854e-05, + "loss": 0.0333, + "step": 2555, + "task_loss": 0.06537258625030518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7661828305224985, + "compression_loss": 0.0, + "distillation_loss": 0.08491934835910797, + "epoch": 2.43, + "learning_rate": 3.660392480045206e-05, + "loss": 0.0799, + "step": 2556, + "task_loss": 0.03468838334083557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7662001135730042, + "compression_loss": 0.0, + "distillation_loss": 0.06277995556592941, + "epoch": 2.43, + "learning_rate": 3.659448583144745e-05, + "loss": 0.06, + "step": 2557, + "task_loss": 0.03468390554189682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7662173907339035, + "compression_loss": 0.0, + "distillation_loss": 0.14954762160778046, + "epoch": 2.43, + "learning_rate": 3.658504475624865e-05, + "loss": 0.1508, + "step": 2558, + "task_loss": 0.16203594207763672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7662346620061999, + "compression_loss": 0.0, + "distillation_loss": 0.08595126867294312, + "epoch": 2.43, + "learning_rate": 3.657560157657067e-05, + "loss": 0.0885, + "step": 2559, + "task_loss": 0.11119415611028671 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7662519273908973, + "compression_loss": 0.0, + "distillation_loss": 0.25560837984085083, + "epoch": 2.43, + "learning_rate": 3.656615629412892e-05, + "loss": 0.2518, + "step": 2560, + "task_loss": 0.21740317344665527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766269186888999, + "compression_loss": 0.0, + "distillation_loss": 0.03523785620927811, + "epoch": 2.43, + "learning_rate": 3.655670891063917e-05, + "loss": 0.0325, + "step": 2561, + "task_loss": 0.007899057120084763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7662864405015091, + "compression_loss": 0.0, + "distillation_loss": 0.07272191345691681, + "epoch": 2.43, + "learning_rate": 3.6547259427817595e-05, + "loss": 0.068, + "step": 2562, + "task_loss": 0.025067364796996117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7663036882294311, + "compression_loss": 0.0, + "distillation_loss": 0.06609214842319489, + "epoch": 2.43, + "learning_rate": 3.6537807847380726e-05, + "loss": 0.0712, + "step": 2563, + "task_loss": 0.11691683530807495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7663209300737686, + "compression_loss": 0.0, + "distillation_loss": 0.06222629174590111, + "epoch": 2.43, + "learning_rate": 3.65283541710455e-05, + "loss": 0.0747, + "step": 2564, + "task_loss": 0.18653015792369843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7663381660355253, + "compression_loss": 0.0, + "distillation_loss": 0.08360789716243744, + "epoch": 2.44, + "learning_rate": 3.6518898400529214e-05, + "loss": 0.0781, + "step": 2565, + "task_loss": 0.02866414003074169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7663553961157049, + "compression_loss": 0.0, + "distillation_loss": 0.044771708548069, + "epoch": 2.44, + "learning_rate": 3.650944053754956e-05, + "loss": 0.0451, + "step": 2566, + "task_loss": 0.04824347794055939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7663726203153112, + "compression_loss": 0.0, + "distillation_loss": 0.09424047917127609, + "epoch": 2.44, + "learning_rate": 3.6499980583824606e-05, + "loss": 0.0961, + "step": 2567, + "task_loss": 0.1124841496348381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7663898386353478, + "compression_loss": 0.0, + "distillation_loss": 0.04939156025648117, + "epoch": 2.44, + "learning_rate": 3.64905185410728e-05, + "loss": 0.0606, + "step": 2568, + "task_loss": 0.16101084649562836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7664070510768184, + "compression_loss": 0.0, + "distillation_loss": 0.08628442138433456, + "epoch": 2.44, + "learning_rate": 3.6481054411012946e-05, + "loss": 0.087, + "step": 2569, + "task_loss": 0.09357213228940964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7664242576407266, + "compression_loss": 0.0, + "distillation_loss": 0.06562324613332748, + "epoch": 2.44, + "learning_rate": 3.647158819536427e-05, + "loss": 0.0648, + "step": 2570, + "task_loss": 0.057407625019550323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7664414583280762, + "compression_loss": 0.0, + "distillation_loss": 0.18060490489006042, + "epoch": 2.44, + "learning_rate": 3.646211989584635e-05, + "loss": 0.1716, + "step": 2571, + "task_loss": 0.09014902263879776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7664586531398708, + "compression_loss": 0.0, + "distillation_loss": 0.072050541639328, + "epoch": 2.44, + "learning_rate": 3.645264951417915e-05, + "loss": 0.0875, + "step": 2572, + "task_loss": 0.2262389212846756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7664758420771142, + "compression_loss": 0.0, + "distillation_loss": 0.1359626054763794, + "epoch": 2.44, + "learning_rate": 3.644317705208301e-05, + "loss": 0.1305, + "step": 2573, + "task_loss": 0.08181658387184143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7664930251408099, + "compression_loss": 0.0, + "distillation_loss": 0.049975574016571045, + "epoch": 2.44, + "learning_rate": 3.643370251127865e-05, + "loss": 0.0476, + "step": 2574, + "task_loss": 0.025839539244771004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7665102023319617, + "compression_loss": 0.0, + "distillation_loss": 0.13199934363365173, + "epoch": 2.45, + "learning_rate": 3.6424225893487166e-05, + "loss": 0.1329, + "step": 2575, + "task_loss": 0.1408880352973938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7665273736515732, + "compression_loss": 0.0, + "distillation_loss": 0.036083199083805084, + "epoch": 2.45, + "learning_rate": 3.641474720043002e-05, + "loss": 0.0437, + "step": 2576, + "task_loss": 0.11259350180625916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7665445391006481, + "compression_loss": 0.0, + "distillation_loss": 0.06714160740375519, + "epoch": 2.45, + "learning_rate": 3.6405266433829075e-05, + "loss": 0.0623, + "step": 2577, + "task_loss": 0.0186399407684803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7665616986801902, + "compression_loss": 0.0, + "distillation_loss": 0.1377488076686859, + "epoch": 2.45, + "learning_rate": 3.639578359540655e-05, + "loss": 0.1438, + "step": 2578, + "task_loss": 0.19833146035671234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7665788523912032, + "compression_loss": 0.0, + "distillation_loss": 0.030588299036026, + "epoch": 2.45, + "learning_rate": 3.638629868688506e-05, + "loss": 0.0324, + "step": 2579, + "task_loss": 0.04895820468664169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7665960002346905, + "compression_loss": 0.0, + "distillation_loss": 0.022023940458893776, + "epoch": 2.45, + "learning_rate": 3.6376811709987574e-05, + "loss": 0.0295, + "step": 2580, + "task_loss": 0.09721846878528595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766613142211656, + "compression_loss": 0.0, + "distillation_loss": 0.021494712680578232, + "epoch": 2.45, + "learning_rate": 3.636732266643745e-05, + "loss": 0.028, + "step": 2581, + "task_loss": 0.0862061157822609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7666302783231035, + "compression_loss": 0.0, + "distillation_loss": 0.019449274986982346, + "epoch": 2.45, + "learning_rate": 3.635783155795841e-05, + "loss": 0.0182, + "step": 2582, + "task_loss": 0.006459413096308708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7666474085700363, + "compression_loss": 0.0, + "distillation_loss": 0.04088941961526871, + "epoch": 2.45, + "learning_rate": 3.634833838627458e-05, + "loss": 0.0504, + "step": 2583, + "task_loss": 0.13594874739646912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7666645329534585, + "compression_loss": 0.0, + "distillation_loss": 0.23599717020988464, + "epoch": 2.45, + "learning_rate": 3.6338843153110424e-05, + "loss": 0.2423, + "step": 2584, + "task_loss": 0.29853349924087524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7666816514743735, + "compression_loss": 0.0, + "distillation_loss": 0.1065201386809349, + "epoch": 2.45, + "learning_rate": 3.63293458601908e-05, + "loss": 0.1064, + "step": 2585, + "task_loss": 0.10565009713172913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7666987641337851, + "compression_loss": 0.0, + "distillation_loss": 0.08189831674098969, + "epoch": 2.46, + "learning_rate": 3.631984650924094e-05, + "loss": 0.0766, + "step": 2586, + "task_loss": 0.029174111783504486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766715870932697, + "compression_loss": 0.0, + "distillation_loss": 0.07314605265855789, + "epoch": 2.46, + "learning_rate": 3.631034510198643e-05, + "loss": 0.067, + "step": 2587, + "task_loss": 0.011908493936061859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7667329718721128, + "compression_loss": 0.0, + "distillation_loss": 0.20526961982250214, + "epoch": 2.46, + "learning_rate": 3.630084164015328e-05, + "loss": 0.2009, + "step": 2588, + "task_loss": 0.1615629643201828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7667500669530363, + "compression_loss": 0.0, + "distillation_loss": 0.02173478901386261, + "epoch": 2.46, + "learning_rate": 3.6291336125467814e-05, + "loss": 0.0284, + "step": 2589, + "task_loss": 0.08851068466901779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766767156176471, + "compression_loss": 0.0, + "distillation_loss": 0.0929790809750557, + "epoch": 2.46, + "learning_rate": 3.628182855965676e-05, + "loss": 0.0901, + "step": 2590, + "task_loss": 0.06388040632009506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7667842395434207, + "compression_loss": 0.0, + "distillation_loss": 0.09808763116598129, + "epoch": 2.46, + "learning_rate": 3.627231894444721e-05, + "loss": 0.093, + "step": 2591, + "task_loss": 0.04766422510147095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7668013170548891, + "compression_loss": 0.0, + "distillation_loss": 0.08065304160118103, + "epoch": 2.46, + "learning_rate": 3.6262807281566634e-05, + "loss": 0.0768, + "step": 2592, + "task_loss": 0.04211016744375229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7668183887118799, + "compression_loss": 0.0, + "distillation_loss": 0.09210735559463501, + "epoch": 2.46, + "learning_rate": 3.6253293572742884e-05, + "loss": 0.0841, + "step": 2593, + "task_loss": 0.011922374367713928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7668354545153966, + "compression_loss": 0.0, + "distillation_loss": 0.20635256171226501, + "epoch": 2.46, + "learning_rate": 3.624377781970416e-05, + "loss": 0.1924, + "step": 2594, + "task_loss": 0.06673353165388107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7668525144664431, + "compression_loss": 0.0, + "distillation_loss": 0.1058523878455162, + "epoch": 2.46, + "learning_rate": 3.6234260024179033e-05, + "loss": 0.1146, + "step": 2595, + "task_loss": 0.1931142807006836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.766869568566023, + "compression_loss": 0.0, + "distillation_loss": 0.029114918783307076, + "epoch": 2.47, + "learning_rate": 3.622474018789648e-05, + "loss": 0.0281, + "step": 2596, + "task_loss": 0.018587203696370125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76688661681514, + "compression_loss": 0.0, + "distillation_loss": 0.025142014026641846, + "epoch": 2.47, + "learning_rate": 3.62152183125858e-05, + "loss": 0.0326, + "step": 2597, + "task_loss": 0.09944605827331543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7669036592147978, + "compression_loss": 0.0, + "distillation_loss": 0.19486957788467407, + "epoch": 2.47, + "learning_rate": 3.620569439997671e-05, + "loss": 0.2059, + "step": 2598, + "task_loss": 0.30563467741012573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7669206957659999, + "compression_loss": 0.0, + "distillation_loss": 0.03863801434636116, + "epoch": 2.47, + "learning_rate": 3.6196168451799266e-05, + "loss": 0.0356, + "step": 2599, + "task_loss": 0.008016956970095634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7669377264697502, + "compression_loss": 0.0, + "distillation_loss": 0.13873320817947388, + "epoch": 2.47, + "learning_rate": 3.618664046978389e-05, + "loss": 0.1375, + "step": 2600, + "task_loss": 0.12616637349128723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7669547513270523, + "compression_loss": 0.0, + "distillation_loss": 0.022825509309768677, + "epoch": 2.47, + "learning_rate": 3.617711045566141e-05, + "loss": 0.0215, + "step": 2601, + "task_loss": 0.00933530181646347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76697177033891, + "compression_loss": 0.0, + "distillation_loss": 0.06487306207418442, + "epoch": 2.47, + "learning_rate": 3.616757841116298e-05, + "loss": 0.0745, + "step": 2602, + "task_loss": 0.16160070896148682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7669887835063267, + "compression_loss": 0.0, + "distillation_loss": 0.0611257366836071, + "epoch": 2.47, + "learning_rate": 3.6158044338020155e-05, + "loss": 0.0651, + "step": 2603, + "task_loss": 0.1010468602180481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7670057908303063, + "compression_loss": 0.0, + "distillation_loss": 0.028607673943042755, + "epoch": 2.47, + "learning_rate": 3.614850823796483e-05, + "loss": 0.0267, + "step": 2604, + "task_loss": 0.009802697226405144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7670227923118524, + "compression_loss": 0.0, + "distillation_loss": 0.19368760287761688, + "epoch": 2.47, + "learning_rate": 3.6138970112729296e-05, + "loss": 0.1928, + "step": 2605, + "task_loss": 0.184406578540802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7670397879519689, + "compression_loss": 0.0, + "distillation_loss": 0.06413474678993225, + "epoch": 2.47, + "learning_rate": 3.612942996404619e-05, + "loss": 0.0689, + "step": 2606, + "task_loss": 0.11217701435089111 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7670567777516591, + "compression_loss": 0.0, + "distillation_loss": 0.07466208934783936, + "epoch": 2.48, + "learning_rate": 3.611988779364853e-05, + "loss": 0.0731, + "step": 2607, + "task_loss": 0.059125810861587524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767073761711927, + "compression_loss": 0.0, + "distillation_loss": 0.034310828894376755, + "epoch": 2.48, + "learning_rate": 3.611034360326971e-05, + "loss": 0.0405, + "step": 2608, + "task_loss": 0.09609294682741165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7670907398337762, + "compression_loss": 0.0, + "distillation_loss": 0.07237622141838074, + "epoch": 2.48, + "learning_rate": 3.6100797394643455e-05, + "loss": 0.0682, + "step": 2609, + "task_loss": 0.03029775619506836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7671077121182103, + "compression_loss": 0.0, + "distillation_loss": 0.03261907026171684, + "epoch": 2.48, + "learning_rate": 3.60912491695039e-05, + "loss": 0.0302, + "step": 2610, + "task_loss": 0.008593011647462845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7671246785662329, + "compression_loss": 0.0, + "distillation_loss": 0.044936779886484146, + "epoch": 2.48, + "learning_rate": 3.608169892958551e-05, + "loss": 0.0455, + "step": 2611, + "task_loss": 0.050538793206214905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7671416391788479, + "compression_loss": 0.0, + "distillation_loss": 0.04782456159591675, + "epoch": 2.48, + "learning_rate": 3.607214667662314e-05, + "loss": 0.0502, + "step": 2612, + "task_loss": 0.07197590172290802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767158593957059, + "compression_loss": 0.0, + "distillation_loss": 0.14677798748016357, + "epoch": 2.48, + "learning_rate": 3.606259241235201e-05, + "loss": 0.1464, + "step": 2613, + "task_loss": 0.1426515430212021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7671755429018696, + "compression_loss": 0.0, + "distillation_loss": 0.08823909610509872, + "epoch": 2.48, + "learning_rate": 3.605303613850768e-05, + "loss": 0.0938, + "step": 2614, + "task_loss": 0.14365153014659882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7671924860142837, + "compression_loss": 0.0, + "distillation_loss": 0.21100696921348572, + "epoch": 2.48, + "learning_rate": 3.604347785682611e-05, + "loss": 0.2037, + "step": 2615, + "task_loss": 0.13835720717906952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7672094232953047, + "compression_loss": 0.0, + "distillation_loss": 0.12257960438728333, + "epoch": 2.48, + "learning_rate": 3.60339175690436e-05, + "loss": 0.115, + "step": 2616, + "task_loss": 0.04694103077054024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7672263547459366, + "compression_loss": 0.0, + "distillation_loss": 0.18178528547286987, + "epoch": 2.49, + "learning_rate": 3.602435527689683e-05, + "loss": 0.1834, + "step": 2617, + "task_loss": 0.198032945394516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7672432803671828, + "compression_loss": 0.0, + "distillation_loss": 0.11362110078334808, + "epoch": 2.49, + "learning_rate": 3.6014790982122816e-05, + "loss": 0.1279, + "step": 2618, + "task_loss": 0.25671568512916565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767260200160047, + "compression_loss": 0.0, + "distillation_loss": 0.10963121801614761, + "epoch": 2.49, + "learning_rate": 3.6005224686458985e-05, + "loss": 0.1102, + "step": 2619, + "task_loss": 0.11555326730012894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7672771141255331, + "compression_loss": 0.0, + "distillation_loss": 0.02279139682650566, + "epoch": 2.49, + "learning_rate": 3.599565639164308e-05, + "loss": 0.0305, + "step": 2620, + "task_loss": 0.09947670251131058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7672940222646447, + "compression_loss": 0.0, + "distillation_loss": 0.07615819573402405, + "epoch": 2.49, + "learning_rate": 3.5986086099413234e-05, + "loss": 0.0782, + "step": 2621, + "task_loss": 0.09702453017234802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7673109245783853, + "compression_loss": 0.0, + "distillation_loss": 0.09059733152389526, + "epoch": 2.49, + "learning_rate": 3.597651381150795e-05, + "loss": 0.1036, + "step": 2622, + "task_loss": 0.22066594660282135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7673278210677588, + "compression_loss": 0.0, + "distillation_loss": 0.17163929343223572, + "epoch": 2.49, + "learning_rate": 3.5966939529666056e-05, + "loss": 0.1662, + "step": 2623, + "task_loss": 0.11702217161655426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7673447117337687, + "compression_loss": 0.0, + "distillation_loss": 0.25577402114868164, + "epoch": 2.49, + "learning_rate": 3.595736325562679e-05, + "loss": 0.2671, + "step": 2624, + "task_loss": 0.36930835247039795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7673615965774189, + "compression_loss": 0.0, + "distillation_loss": 0.0822661817073822, + "epoch": 2.49, + "learning_rate": 3.5947784991129716e-05, + "loss": 0.0869, + "step": 2625, + "task_loss": 0.1290304958820343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7673784755997128, + "compression_loss": 0.0, + "distillation_loss": 0.059237804263830185, + "epoch": 2.49, + "learning_rate": 3.593820473791476e-05, + "loss": 0.0793, + "step": 2626, + "task_loss": 0.26018190383911133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7673953488016544, + "compression_loss": 0.0, + "distillation_loss": 0.31930679082870483, + "epoch": 2.49, + "learning_rate": 3.5928622497722245e-05, + "loss": 0.3108, + "step": 2627, + "task_loss": 0.2345261424779892 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7674122161842472, + "compression_loss": 0.0, + "distillation_loss": 0.12323985993862152, + "epoch": 2.5, + "learning_rate": 3.591903827229282e-05, + "loss": 0.1256, + "step": 2628, + "task_loss": 0.1471230387687683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7674290777484948, + "compression_loss": 0.0, + "distillation_loss": 0.16051419079303741, + "epoch": 2.5, + "learning_rate": 3.590945206336751e-05, + "loss": 0.1523, + "step": 2629, + "task_loss": 0.07876568287611008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767445933495401, + "compression_loss": 0.0, + "distillation_loss": 0.15569360554218292, + "epoch": 2.5, + "learning_rate": 3.5899863872687675e-05, + "loss": 0.1511, + "step": 2630, + "task_loss": 0.10985402017831802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7674627834259696, + "compression_loss": 0.0, + "distillation_loss": 0.3402307629585266, + "epoch": 2.5, + "learning_rate": 3.58902737019951e-05, + "loss": 0.3425, + "step": 2631, + "task_loss": 0.36277827620506287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767479627541204, + "compression_loss": 0.0, + "distillation_loss": 0.2227693647146225, + "epoch": 2.5, + "learning_rate": 3.5880681553031835e-05, + "loss": 0.2177, + "step": 2632, + "task_loss": 0.17254310846328735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7674964658421082, + "compression_loss": 0.0, + "distillation_loss": 0.1516626626253128, + "epoch": 2.5, + "learning_rate": 3.5871087427540375e-05, + "loss": 0.153, + "step": 2633, + "task_loss": 0.16509924829006195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7675132983296856, + "compression_loss": 0.0, + "distillation_loss": 0.06550684571266174, + "epoch": 2.5, + "learning_rate": 3.586149132726353e-05, + "loss": 0.0695, + "step": 2634, + "task_loss": 0.10565401613712311 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7675301250049401, + "compression_loss": 0.0, + "distillation_loss": 0.08065078407526016, + "epoch": 2.5, + "learning_rate": 3.585189325394447e-05, + "loss": 0.0776, + "step": 2635, + "task_loss": 0.04991026595234871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7675469458688752, + "compression_loss": 0.0, + "distillation_loss": 0.10402727127075195, + "epoch": 2.5, + "learning_rate": 3.5842293209326746e-05, + "loss": 0.1008, + "step": 2636, + "task_loss": 0.07204234600067139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7675637609224947, + "compression_loss": 0.0, + "distillation_loss": 0.1009630411863327, + "epoch": 2.5, + "learning_rate": 3.583269119515423e-05, + "loss": 0.1036, + "step": 2637, + "task_loss": 0.1269753873348236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7675805701668021, + "compression_loss": 0.0, + "distillation_loss": 0.08721812069416046, + "epoch": 2.51, + "learning_rate": 3.58230872131712e-05, + "loss": 0.0916, + "step": 2638, + "task_loss": 0.13126561045646667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7675973736028014, + "compression_loss": 0.0, + "distillation_loss": 0.06688186526298523, + "epoch": 2.51, + "learning_rate": 3.581348126512225e-05, + "loss": 0.0659, + "step": 2639, + "task_loss": 0.0573241263628006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7676141712314961, + "compression_loss": 0.0, + "distillation_loss": 0.07377175241708755, + "epoch": 2.51, + "learning_rate": 3.5803873352752343e-05, + "loss": 0.0862, + "step": 2640, + "task_loss": 0.19805538654327393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7676309630538898, + "compression_loss": 0.0, + "distillation_loss": 0.1930721551179886, + "epoch": 2.51, + "learning_rate": 3.5794263477806816e-05, + "loss": 0.185, + "step": 2641, + "task_loss": 0.1118684709072113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7676477490709863, + "compression_loss": 0.0, + "distillation_loss": 0.13114435970783234, + "epoch": 2.51, + "learning_rate": 3.578465164203134e-05, + "loss": 0.1396, + "step": 2642, + "task_loss": 0.21528376638889313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7676645292837894, + "compression_loss": 0.0, + "distillation_loss": 0.1114916130900383, + "epoch": 2.51, + "learning_rate": 3.577503784717195e-05, + "loss": 0.1063, + "step": 2643, + "task_loss": 0.05928418040275574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7676813036933025, + "compression_loss": 0.0, + "distillation_loss": 0.09665559232234955, + "epoch": 2.51, + "learning_rate": 3.576542209497505e-05, + "loss": 0.0976, + "step": 2644, + "task_loss": 0.10586561262607574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7676980723005294, + "compression_loss": 0.0, + "distillation_loss": 0.057753339409828186, + "epoch": 2.51, + "learning_rate": 3.575580438718738e-05, + "loss": 0.0596, + "step": 2645, + "task_loss": 0.07584469765424728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7677148351064739, + "compression_loss": 0.0, + "distillation_loss": 0.11892731487751007, + "epoch": 2.51, + "learning_rate": 3.574618472555604e-05, + "loss": 0.1125, + "step": 2646, + "task_loss": 0.05511043220758438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7677315921121395, + "compression_loss": 0.0, + "distillation_loss": 0.1064673513174057, + "epoch": 2.51, + "learning_rate": 3.573656311182848e-05, + "loss": 0.1096, + "step": 2647, + "task_loss": 0.13810373842716217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7677483433185299, + "compression_loss": 0.0, + "distillation_loss": 0.17461617290973663, + "epoch": 2.51, + "learning_rate": 3.5726939547752536e-05, + "loss": 0.1789, + "step": 2648, + "task_loss": 0.21704277396202087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767765088726649, + "compression_loss": 0.0, + "distillation_loss": 0.22404174506664276, + "epoch": 2.52, + "learning_rate": 3.5717314035076355e-05, + "loss": 0.216, + "step": 2649, + "task_loss": 0.14349466562271118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7677818283375003, + "compression_loss": 0.0, + "distillation_loss": 0.05500160902738571, + "epoch": 2.52, + "learning_rate": 3.570768657554847e-05, + "loss": 0.0503, + "step": 2650, + "task_loss": 0.008138328790664673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7677985621520874, + "compression_loss": 0.0, + "distillation_loss": 0.12913605570793152, + "epoch": 2.52, + "learning_rate": 3.569805717091775e-05, + "loss": 0.1416, + "step": 2651, + "task_loss": 0.2541487216949463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7678152901714143, + "compression_loss": 0.0, + "distillation_loss": 0.10944856703281403, + "epoch": 2.52, + "learning_rate": 3.5688425822933414e-05, + "loss": 0.1132, + "step": 2652, + "task_loss": 0.14650918543338776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7678320123964844, + "compression_loss": 0.0, + "distillation_loss": 0.031936414539813995, + "epoch": 2.52, + "learning_rate": 3.5678792533345055e-05, + "loss": 0.0292, + "step": 2653, + "task_loss": 0.004621490836143494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7678487288283014, + "compression_loss": 0.0, + "distillation_loss": 0.0944894552230835, + "epoch": 2.52, + "learning_rate": 3.5669157303902604e-05, + "loss": 0.0947, + "step": 2654, + "task_loss": 0.09676090627908707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767865439467869, + "compression_loss": 0.0, + "distillation_loss": 0.03909418731927872, + "epoch": 2.52, + "learning_rate": 3.565952013635635e-05, + "loss": 0.0408, + "step": 2655, + "task_loss": 0.05638463795185089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767882144316191, + "compression_loss": 0.0, + "distillation_loss": 0.06495656073093414, + "epoch": 2.52, + "learning_rate": 3.564988103245692e-05, + "loss": 0.0731, + "step": 2656, + "task_loss": 0.14613646268844604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.767898843374271, + "compression_loss": 0.0, + "distillation_loss": 0.09904111921787262, + "epoch": 2.52, + "learning_rate": 3.564023999395531e-05, + "loss": 0.1154, + "step": 2657, + "task_loss": 0.26308944821357727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7679155366431126, + "compression_loss": 0.0, + "distillation_loss": 0.13033923506736755, + "epoch": 2.52, + "learning_rate": 3.563059702260287e-05, + "loss": 0.1402, + "step": 2658, + "task_loss": 0.22941374778747559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7679322241237196, + "compression_loss": 0.0, + "distillation_loss": 0.038218479603528976, + "epoch": 2.53, + "learning_rate": 3.562095212015128e-05, + "loss": 0.0439, + "step": 2659, + "task_loss": 0.09546901285648346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7679489058170957, + "compression_loss": 0.0, + "distillation_loss": 0.06592346727848053, + "epoch": 2.53, + "learning_rate": 3.5611305288352576e-05, + "loss": 0.0639, + "step": 2660, + "task_loss": 0.04572898894548416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7679655817242446, + "compression_loss": 0.0, + "distillation_loss": 0.1503419280052185, + "epoch": 2.53, + "learning_rate": 3.560165652895917e-05, + "loss": 0.1453, + "step": 2661, + "task_loss": 0.09945236146450043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7679822518461699, + "compression_loss": 0.0, + "distillation_loss": 0.03605799376964569, + "epoch": 2.53, + "learning_rate": 3.5592005843723794e-05, + "loss": 0.0407, + "step": 2662, + "task_loss": 0.08254842460155487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7679989161838752, + "compression_loss": 0.0, + "distillation_loss": 0.06412842869758606, + "epoch": 2.53, + "learning_rate": 3.558235323439955e-05, + "loss": 0.0721, + "step": 2663, + "task_loss": 0.14363062381744385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7680155747383642, + "compression_loss": 0.0, + "distillation_loss": 0.045804090797901154, + "epoch": 2.53, + "learning_rate": 3.557269870273987e-05, + "loss": 0.0485, + "step": 2664, + "task_loss": 0.07302069664001465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7680322275106408, + "compression_loss": 0.0, + "distillation_loss": 0.1284482777118683, + "epoch": 2.53, + "learning_rate": 3.5563042250498556e-05, + "loss": 0.131, + "step": 2665, + "task_loss": 0.1538638472557068 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7680488745017086, + "compression_loss": 0.0, + "distillation_loss": 0.037929221987724304, + "epoch": 2.53, + "learning_rate": 3.555338387942974e-05, + "loss": 0.0348, + "step": 2666, + "task_loss": 0.0066341981291770935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7680655157125711, + "compression_loss": 0.0, + "distillation_loss": 0.04109591618180275, + "epoch": 2.53, + "learning_rate": 3.5543723591287916e-05, + "loss": 0.0568, + "step": 2667, + "task_loss": 0.19810107350349426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7680821511442322, + "compression_loss": 0.0, + "distillation_loss": 0.03841260448098183, + "epoch": 2.53, + "learning_rate": 3.5534061387827936e-05, + "loss": 0.0355, + "step": 2668, + "task_loss": 0.009714646264910698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7680987807976954, + "compression_loss": 0.0, + "distillation_loss": 0.11870106309652328, + "epoch": 2.53, + "learning_rate": 3.552439727080495e-05, + "loss": 0.1244, + "step": 2669, + "task_loss": 0.1760600507259369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7681154046739646, + "compression_loss": 0.0, + "distillation_loss": 0.10362593829631805, + "epoch": 2.54, + "learning_rate": 3.5514731241974544e-05, + "loss": 0.1104, + "step": 2670, + "task_loss": 0.171173095703125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7681320227740432, + "compression_loss": 0.0, + "distillation_loss": 0.14736101031303406, + "epoch": 2.54, + "learning_rate": 3.5505063303092545e-05, + "loss": 0.142, + "step": 2671, + "task_loss": 0.09334026277065277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7681486350989352, + "compression_loss": 0.0, + "distillation_loss": 0.16119350492954254, + "epoch": 2.54, + "learning_rate": 3.549539345591521e-05, + "loss": 0.1542, + "step": 2672, + "task_loss": 0.09096268564462662 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.768165241649644, + "compression_loss": 0.0, + "distillation_loss": 0.22717413306236267, + "epoch": 2.54, + "learning_rate": 3.5485721702199104e-05, + "loss": 0.2191, + "step": 2673, + "task_loss": 0.1469288021326065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7681818424271735, + "compression_loss": 0.0, + "distillation_loss": 0.04752913862466812, + "epoch": 2.54, + "learning_rate": 3.547604804370116e-05, + "loss": 0.0435, + "step": 2674, + "task_loss": 0.007610570639371872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7681984374325271, + "compression_loss": 0.0, + "distillation_loss": 0.1687610149383545, + "epoch": 2.54, + "learning_rate": 3.5466372482178635e-05, + "loss": 0.1833, + "step": 2675, + "task_loss": 0.31412065029144287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7682150266667088, + "compression_loss": 0.0, + "distillation_loss": 0.1348022222518921, + "epoch": 2.54, + "learning_rate": 3.545669501938913e-05, + "loss": 0.1269, + "step": 2676, + "task_loss": 0.056100159883499146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7682316101307222, + "compression_loss": 0.0, + "distillation_loss": 0.20402267575263977, + "epoch": 2.54, + "learning_rate": 3.544701565709063e-05, + "loss": 0.2063, + "step": 2677, + "task_loss": 0.22638899087905884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7682481878255708, + "compression_loss": 0.0, + "distillation_loss": 0.14998939633369446, + "epoch": 2.54, + "learning_rate": 3.54373343970414e-05, + "loss": 0.143, + "step": 2678, + "task_loss": 0.08004312962293625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7682647597522585, + "compression_loss": 0.0, + "distillation_loss": 0.07659981399774551, + "epoch": 2.54, + "learning_rate": 3.542765124100014e-05, + "loss": 0.0728, + "step": 2679, + "task_loss": 0.03887277841567993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7682813259117889, + "compression_loss": 0.0, + "distillation_loss": 0.06985659897327423, + "epoch": 2.55, + "learning_rate": 3.541796619072579e-05, + "loss": 0.0637, + "step": 2680, + "task_loss": 0.008264170959591866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7682978863051656, + "compression_loss": 0.0, + "distillation_loss": 0.14076358079910278, + "epoch": 2.55, + "learning_rate": 3.540827924797772e-05, + "loss": 0.143, + "step": 2681, + "task_loss": 0.16294986009597778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7683144409333924, + "compression_loss": 0.0, + "distillation_loss": 0.05743217468261719, + "epoch": 2.55, + "learning_rate": 3.5398590414515586e-05, + "loss": 0.0619, + "step": 2682, + "task_loss": 0.10237649083137512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7683309897974729, + "compression_loss": 0.0, + "distillation_loss": 0.0506620779633522, + "epoch": 2.55, + "learning_rate": 3.5388899692099433e-05, + "loss": 0.0486, + "step": 2683, + "task_loss": 0.03040938824415207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7683475328984108, + "compression_loss": 0.0, + "distillation_loss": 0.042747851461172104, + "epoch": 2.55, + "learning_rate": 3.537920708248961e-05, + "loss": 0.062, + "step": 2684, + "task_loss": 0.23519286513328552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76836407023721, + "compression_loss": 0.0, + "distillation_loss": 0.13504308462142944, + "epoch": 2.55, + "learning_rate": 3.536951258744684e-05, + "loss": 0.1385, + "step": 2685, + "task_loss": 0.16998833417892456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7683806018148738, + "compression_loss": 0.0, + "distillation_loss": 0.16051191091537476, + "epoch": 2.55, + "learning_rate": 3.5359816208732164e-05, + "loss": 0.1578, + "step": 2686, + "task_loss": 0.13356226682662964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7683971276324061, + "compression_loss": 0.0, + "distillation_loss": 0.08552385866641998, + "epoch": 2.55, + "learning_rate": 3.535011794810698e-05, + "loss": 0.0798, + "step": 2687, + "task_loss": 0.027825014665722847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7684136476908106, + "compression_loss": 0.0, + "distillation_loss": 0.04348837211728096, + "epoch": 2.55, + "learning_rate": 3.5340417807333026e-05, + "loss": 0.0489, + "step": 2688, + "task_loss": 0.0971134603023529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7684301619910909, + "compression_loss": 0.0, + "distillation_loss": 0.03343481570482254, + "epoch": 2.55, + "learning_rate": 3.533071578817239e-05, + "loss": 0.0309, + "step": 2689, + "task_loss": 0.008328597992658615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7684466705342508, + "compression_loss": 0.0, + "distillation_loss": 0.09740258753299713, + "epoch": 2.55, + "learning_rate": 3.5321011892387455e-05, + "loss": 0.0948, + "step": 2690, + "task_loss": 0.07093527913093567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7684631733212938, + "compression_loss": 0.0, + "distillation_loss": 0.048360321670770645, + "epoch": 2.56, + "learning_rate": 3.5311306121741015e-05, + "loss": 0.0522, + "step": 2691, + "task_loss": 0.08722103387117386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7684796703532236, + "compression_loss": 0.0, + "distillation_loss": 0.021398166194558144, + "epoch": 2.56, + "learning_rate": 3.530159847799616e-05, + "loss": 0.0198, + "step": 2692, + "task_loss": 0.005231667309999466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7684961616310441, + "compression_loss": 0.0, + "distillation_loss": 0.2711659371852875, + "epoch": 2.56, + "learning_rate": 3.529188896291632e-05, + "loss": 0.257, + "step": 2693, + "task_loss": 0.1298050582408905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7685126471557587, + "compression_loss": 0.0, + "distillation_loss": 0.07550038397312164, + "epoch": 2.56, + "learning_rate": 3.5282177578265296e-05, + "loss": 0.0708, + "step": 2694, + "task_loss": 0.028521571308374405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7685291269283714, + "compression_loss": 0.0, + "distillation_loss": 0.17867624759674072, + "epoch": 2.56, + "learning_rate": 3.527246432580718e-05, + "loss": 0.1926, + "step": 2695, + "task_loss": 0.31801068782806396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7685456009498856, + "compression_loss": 0.0, + "distillation_loss": 0.1781276911497116, + "epoch": 2.56, + "learning_rate": 3.526274920730645e-05, + "loss": 0.1767, + "step": 2696, + "task_loss": 0.16398294270038605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7685620692213051, + "compression_loss": 0.0, + "distillation_loss": 0.0298934206366539, + "epoch": 2.56, + "learning_rate": 3.525303222452791e-05, + "loss": 0.0383, + "step": 2697, + "task_loss": 0.11393886804580688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7685785317436336, + "compression_loss": 0.0, + "distillation_loss": 0.03145752474665642, + "epoch": 2.56, + "learning_rate": 3.5243313379236685e-05, + "loss": 0.0375, + "step": 2698, + "task_loss": 0.09145695716142654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7685949885178748, + "compression_loss": 0.0, + "distillation_loss": 0.3614198565483093, + "epoch": 2.56, + "learning_rate": 3.5233592673198245e-05, + "loss": 0.3515, + "step": 2699, + "task_loss": 0.26197120547294617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7686114395450322, + "compression_loss": 0.0, + "distillation_loss": 0.06908755749464035, + "epoch": 2.56, + "learning_rate": 3.522387010817842e-05, + "loss": 0.0775, + "step": 2700, + "task_loss": 0.15282829105854034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7686278848261097, + "compression_loss": 0.0, + "distillation_loss": 0.07420150190591812, + "epoch": 2.57, + "learning_rate": 3.521414568594335e-05, + "loss": 0.0918, + "step": 2701, + "task_loss": 0.25066766142845154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7686443243621108, + "compression_loss": 0.0, + "distillation_loss": 0.2168622463941574, + "epoch": 2.57, + "learning_rate": 3.520441940825952e-05, + "loss": 0.2086, + "step": 2702, + "task_loss": 0.1346578747034073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7686607581540394, + "compression_loss": 0.0, + "distillation_loss": 0.2985137701034546, + "epoch": 2.57, + "learning_rate": 3.5194691276893755e-05, + "loss": 0.2939, + "step": 2703, + "task_loss": 0.2523634433746338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.768677186202899, + "compression_loss": 0.0, + "distillation_loss": 0.06703074276447296, + "epoch": 2.57, + "learning_rate": 3.518496129361323e-05, + "loss": 0.0638, + "step": 2704, + "task_loss": 0.03458258882164955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7686936085096935, + "compression_loss": 0.0, + "distillation_loss": 0.044759538024663925, + "epoch": 2.57, + "learning_rate": 3.5175229460185425e-05, + "loss": 0.0411, + "step": 2705, + "task_loss": 0.007678527384996414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7687100250754263, + "compression_loss": 0.0, + "distillation_loss": 0.03616587817668915, + "epoch": 2.57, + "learning_rate": 3.5165495778378196e-05, + "loss": 0.0336, + "step": 2706, + "task_loss": 0.010267160832881927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7687264359011012, + "compression_loss": 0.0, + "distillation_loss": 0.22257006168365479, + "epoch": 2.57, + "learning_rate": 3.51557602499597e-05, + "loss": 0.2122, + "step": 2707, + "task_loss": 0.1191282868385315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7687428409877219, + "compression_loss": 0.0, + "distillation_loss": 0.16167038679122925, + "epoch": 2.57, + "learning_rate": 3.514602287669844e-05, + "loss": 0.1738, + "step": 2708, + "task_loss": 0.2829251289367676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7687592403362921, + "compression_loss": 0.0, + "distillation_loss": 0.020230792462825775, + "epoch": 2.57, + "learning_rate": 3.5136283660363255e-05, + "loss": 0.0191, + "step": 2709, + "task_loss": 0.008950954303145409 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7687756339478155, + "compression_loss": 0.0, + "distillation_loss": 0.10566231608390808, + "epoch": 2.57, + "learning_rate": 3.5126542602723334e-05, + "loss": 0.1088, + "step": 2710, + "task_loss": 0.13719259202480316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7687920218232956, + "compression_loss": 0.0, + "distillation_loss": 0.16966648399829865, + "epoch": 2.57, + "learning_rate": 3.5116799705548175e-05, + "loss": 0.1624, + "step": 2711, + "task_loss": 0.09719309955835342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7688084039637363, + "compression_loss": 0.0, + "distillation_loss": 0.050816163420677185, + "epoch": 2.58, + "learning_rate": 3.510705497060762e-05, + "loss": 0.0547, + "step": 2712, + "task_loss": 0.08981159329414368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7688247803701412, + "compression_loss": 0.0, + "distillation_loss": 0.025936776772141457, + "epoch": 2.58, + "learning_rate": 3.509730839967187e-05, + "loss": 0.0244, + "step": 2713, + "task_loss": 0.010662872344255447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.768841151043514, + "compression_loss": 0.0, + "distillation_loss": 0.07976531982421875, + "epoch": 2.58, + "learning_rate": 3.508755999451141e-05, + "loss": 0.0739, + "step": 2714, + "task_loss": 0.020689811557531357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7688575159848584, + "compression_loss": 0.0, + "distillation_loss": 0.037483714520931244, + "epoch": 2.58, + "learning_rate": 3.50778097568971e-05, + "loss": 0.04, + "step": 2715, + "task_loss": 0.06217388063669205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.768873875195178, + "compression_loss": 0.0, + "distillation_loss": 0.13560466468334198, + "epoch": 2.58, + "learning_rate": 3.506805768860011e-05, + "loss": 0.1373, + "step": 2716, + "task_loss": 0.1521109640598297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7688902286754765, + "compression_loss": 0.0, + "distillation_loss": 0.0700618177652359, + "epoch": 2.58, + "learning_rate": 3.505830379139195e-05, + "loss": 0.0776, + "step": 2717, + "task_loss": 0.1449713408946991 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7689065764267576, + "compression_loss": 0.0, + "distillation_loss": 0.1059018149971962, + "epoch": 2.58, + "learning_rate": 3.504854806704446e-05, + "loss": 0.1082, + "step": 2718, + "task_loss": 0.12906883656978607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.768922918450025, + "compression_loss": 0.0, + "distillation_loss": 0.06906574964523315, + "epoch": 2.58, + "learning_rate": 3.503879051732983e-05, + "loss": 0.0792, + "step": 2719, + "task_loss": 0.16997401416301727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7689392547462826, + "compression_loss": 0.0, + "distillation_loss": 0.03843502700328827, + "epoch": 2.58, + "learning_rate": 3.502903114402055e-05, + "loss": 0.0541, + "step": 2720, + "task_loss": 0.1947551667690277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7689555853165336, + "compression_loss": 0.0, + "distillation_loss": 0.07670299708843231, + "epoch": 2.58, + "learning_rate": 3.501926994888946e-05, + "loss": 0.071, + "step": 2721, + "task_loss": 0.019545117393136024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.768971910161782, + "compression_loss": 0.0, + "distillation_loss": 0.2852790057659149, + "epoch": 2.58, + "learning_rate": 3.500950693370974e-05, + "loss": 0.2769, + "step": 2722, + "task_loss": 0.20112605392932892 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7689882292830315, + "compression_loss": 0.0, + "distillation_loss": 0.06108977273106575, + "epoch": 2.59, + "learning_rate": 3.499974210025487e-05, + "loss": 0.0675, + "step": 2723, + "task_loss": 0.12486658245325089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7690045426812856, + "compression_loss": 0.0, + "distillation_loss": 0.07272262871265411, + "epoch": 2.59, + "learning_rate": 3.4989975450298694e-05, + "loss": 0.0792, + "step": 2724, + "task_loss": 0.13739514350891113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7690208503575482, + "compression_loss": 0.0, + "distillation_loss": 0.13614577054977417, + "epoch": 2.59, + "learning_rate": 3.498020698561536e-05, + "loss": 0.1551, + "step": 2725, + "task_loss": 0.32593345642089844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7690371523128228, + "compression_loss": 0.0, + "distillation_loss": 0.0375310480594635, + "epoch": 2.59, + "learning_rate": 3.497043670797936e-05, + "loss": 0.0366, + "step": 2726, + "task_loss": 0.028382275253534317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7690534485481131, + "compression_loss": 0.0, + "distillation_loss": 0.04977725073695183, + "epoch": 2.59, + "learning_rate": 3.496066461916552e-05, + "loss": 0.0467, + "step": 2727, + "task_loss": 0.019020110368728638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7690697390644229, + "compression_loss": 0.0, + "distillation_loss": 0.1019824743270874, + "epoch": 2.59, + "learning_rate": 3.495089072094898e-05, + "loss": 0.0978, + "step": 2728, + "task_loss": 0.06048191338777542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7690860238627558, + "compression_loss": 0.0, + "distillation_loss": 0.021417532116174698, + "epoch": 2.59, + "learning_rate": 3.494111501510522e-05, + "loss": 0.0293, + "step": 2729, + "task_loss": 0.10010670870542526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7691023029441155, + "compression_loss": 0.0, + "distillation_loss": 0.04595291242003441, + "epoch": 2.59, + "learning_rate": 3.4931337503410034e-05, + "loss": 0.0618, + "step": 2730, + "task_loss": 0.20489037036895752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7691185763095056, + "compression_loss": 0.0, + "distillation_loss": 0.07521625608205795, + "epoch": 2.59, + "learning_rate": 3.4921558187639556e-05, + "loss": 0.0711, + "step": 2731, + "task_loss": 0.0338003970682621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76913484395993, + "compression_loss": 0.0, + "distillation_loss": 0.15600326657295227, + "epoch": 2.59, + "learning_rate": 3.491177706957026e-05, + "loss": 0.1498, + "step": 2732, + "task_loss": 0.0938263088464737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7691511058963922, + "compression_loss": 0.0, + "distillation_loss": 0.19931530952453613, + "epoch": 2.6, + "learning_rate": 3.490199415097892e-05, + "loss": 0.1947, + "step": 2733, + "task_loss": 0.1532270908355713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.769167362119896, + "compression_loss": 0.0, + "distillation_loss": 0.08603714406490326, + "epoch": 2.6, + "learning_rate": 3.489220943364266e-05, + "loss": 0.0839, + "step": 2734, + "task_loss": 0.06482724845409393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7691836126314449, + "compression_loss": 0.0, + "distillation_loss": 0.0459892675280571, + "epoch": 2.6, + "learning_rate": 3.488242291933891e-05, + "loss": 0.0517, + "step": 2735, + "task_loss": 0.10307285934686661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7691998574320427, + "compression_loss": 0.0, + "distillation_loss": 0.09086348861455917, + "epoch": 2.6, + "learning_rate": 3.487263460984544e-05, + "loss": 0.0902, + "step": 2736, + "task_loss": 0.08435454964637756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7692160965226931, + "compression_loss": 0.0, + "distillation_loss": 0.14168088138103485, + "epoch": 2.6, + "learning_rate": 3.486284450694035e-05, + "loss": 0.1393, + "step": 2737, + "task_loss": 0.11769342422485352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7692323299043997, + "compression_loss": 0.0, + "distillation_loss": 0.06477680802345276, + "epoch": 2.6, + "learning_rate": 3.485305261240205e-05, + "loss": 0.0647, + "step": 2738, + "task_loss": 0.06358174979686737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7692485575781663, + "compression_loss": 0.0, + "distillation_loss": 0.10200376808643341, + "epoch": 2.6, + "learning_rate": 3.4843258928009294e-05, + "loss": 0.1103, + "step": 2739, + "task_loss": 0.18466830253601074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7692647795449965, + "compression_loss": 0.0, + "distillation_loss": 0.12464478611946106, + "epoch": 2.6, + "learning_rate": 3.4833463455541146e-05, + "loss": 0.1179, + "step": 2740, + "task_loss": 0.05756617337465286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.769280995805894, + "compression_loss": 0.0, + "distillation_loss": 0.38097622990608215, + "epoch": 2.6, + "learning_rate": 3.4823666196777006e-05, + "loss": 0.3663, + "step": 2741, + "task_loss": 0.23438085615634918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7692972063618625, + "compression_loss": 0.0, + "distillation_loss": 0.09649477154016495, + "epoch": 2.6, + "learning_rate": 3.481386715349659e-05, + "loss": 0.0907, + "step": 2742, + "task_loss": 0.03846879303455353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7693134112139056, + "compression_loss": 0.0, + "distillation_loss": 0.049957577139139175, + "epoch": 2.6, + "learning_rate": 3.4804066327479934e-05, + "loss": 0.0601, + "step": 2743, + "task_loss": 0.15115505456924438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7693296103630272, + "compression_loss": 0.0, + "distillation_loss": 0.06845077127218246, + "epoch": 2.61, + "learning_rate": 3.4794263720507427e-05, + "loss": 0.071, + "step": 2744, + "task_loss": 0.09412077814340591 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7693458038102307, + "compression_loss": 0.0, + "distillation_loss": 0.09953967481851578, + "epoch": 2.61, + "learning_rate": 3.478445933435973e-05, + "loss": 0.1021, + "step": 2745, + "task_loss": 0.1255946159362793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.76936199155652, + "compression_loss": 0.0, + "distillation_loss": 0.04850146174430847, + "epoch": 2.61, + "learning_rate": 3.4774653170817884e-05, + "loss": 0.0495, + "step": 2746, + "task_loss": 0.0581977404654026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7693781736028985, + "compression_loss": 0.0, + "distillation_loss": 0.06344247609376907, + "epoch": 2.61, + "learning_rate": 3.4764845231663205e-05, + "loss": 0.0654, + "step": 2747, + "task_loss": 0.0828084945678711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7693943499503703, + "compression_loss": 0.0, + "distillation_loss": 0.16078636050224304, + "epoch": 2.61, + "learning_rate": 3.475503551867737e-05, + "loss": 0.1636, + "step": 2748, + "task_loss": 0.1886562705039978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7694105205999388, + "compression_loss": 0.0, + "distillation_loss": 0.12109316885471344, + "epoch": 2.61, + "learning_rate": 3.474522403364235e-05, + "loss": 0.1212, + "step": 2749, + "task_loss": 0.12180116772651672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7694266855526076, + "compression_loss": 0.0, + "distillation_loss": 0.11483351141214371, + "epoch": 2.61, + "learning_rate": 3.473541077834045e-05, + "loss": 0.1184, + "step": 2750, + "task_loss": 0.1501813381910324 + }, + { + "epoch": 2.61, + "eval_accuracy": 0.893348623853211, + "eval_loss": 0.4236961901187897, + "eval_runtime": 18.2679, + "eval_samples_per_second": 47.734, + "eval_steps_per_second": 5.967, + "step": 2750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7694428448093806, + "compression_loss": 0.0, + "distillation_loss": 0.11966343224048615, + "epoch": 2.61, + "learning_rate": 3.4725595754554295e-05, + "loss": 0.1252, + "step": 2751, + "task_loss": 0.17513278126716614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7694589983712615, + "compression_loss": 0.0, + "distillation_loss": 0.10641565918922424, + "epoch": 2.61, + "learning_rate": 3.471577896406683e-05, + "loss": 0.1126, + "step": 2752, + "task_loss": 0.1684948056936264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7694751462392538, + "compression_loss": 0.0, + "distillation_loss": 0.08618808537721634, + "epoch": 2.61, + "learning_rate": 3.470596040866133e-05, + "loss": 0.0846, + "step": 2753, + "task_loss": 0.07057564705610275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7694912884143613, + "compression_loss": 0.0, + "distillation_loss": 0.05224674940109253, + "epoch": 2.62, + "learning_rate": 3.4696140090121376e-05, + "loss": 0.0487, + "step": 2754, + "task_loss": 0.016593074426054955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7695074248975876, + "compression_loss": 0.0, + "distillation_loss": 0.1727830171585083, + "epoch": 2.62, + "learning_rate": 3.468631801023088e-05, + "loss": 0.168, + "step": 2755, + "task_loss": 0.12460935115814209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7695235556899365, + "compression_loss": 0.0, + "distillation_loss": 0.04192081093788147, + "epoch": 2.62, + "learning_rate": 3.467649417077406e-05, + "loss": 0.0526, + "step": 2756, + "task_loss": 0.14908772706985474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7695396807924116, + "compression_loss": 0.0, + "distillation_loss": 0.13823646306991577, + "epoch": 2.62, + "learning_rate": 3.466666857353547e-05, + "loss": 0.1403, + "step": 2757, + "task_loss": 0.15889990329742432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7695558002060165, + "compression_loss": 0.0, + "distillation_loss": 0.08324067294597626, + "epoch": 2.62, + "learning_rate": 3.465684122029999e-05, + "loss": 0.0865, + "step": 2758, + "task_loss": 0.11616663634777069 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7695719139317552, + "compression_loss": 0.0, + "distillation_loss": 0.18361113965511322, + "epoch": 2.62, + "learning_rate": 3.464701211285279e-05, + "loss": 0.1828, + "step": 2759, + "task_loss": 0.1751021444797516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7695880219706309, + "compression_loss": 0.0, + "distillation_loss": 0.038572054356336594, + "epoch": 2.62, + "learning_rate": 3.463718125297937e-05, + "loss": 0.0367, + "step": 2760, + "task_loss": 0.020345257595181465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7696041243236478, + "compression_loss": 0.0, + "distillation_loss": 0.11891628056764603, + "epoch": 2.62, + "learning_rate": 3.462734864246557e-05, + "loss": 0.1142, + "step": 2761, + "task_loss": 0.07189692556858063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7696202209918092, + "compression_loss": 0.0, + "distillation_loss": 0.0748853087425232, + "epoch": 2.62, + "learning_rate": 3.4617514283097524e-05, + "loss": 0.0825, + "step": 2762, + "task_loss": 0.1514107882976532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7696363119761188, + "compression_loss": 0.0, + "distillation_loss": 0.17334090173244476, + "epoch": 2.62, + "learning_rate": 3.4607678176661695e-05, + "loss": 0.1803, + "step": 2763, + "task_loss": 0.2433452010154724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7696523972775805, + "compression_loss": 0.0, + "distillation_loss": 0.08427559584379196, + "epoch": 2.62, + "learning_rate": 3.459784032494484e-05, + "loss": 0.1008, + "step": 2764, + "task_loss": 0.24964390695095062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7696684768971979, + "compression_loss": 0.0, + "distillation_loss": 0.04236322641372681, + "epoch": 2.63, + "learning_rate": 3.458800072973408e-05, + "loss": 0.0393, + "step": 2765, + "task_loss": 0.011962385848164558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7696845508359745, + "compression_loss": 0.0, + "distillation_loss": 0.04558961093425751, + "epoch": 2.63, + "learning_rate": 3.45781593928168e-05, + "loss": 0.051, + "step": 2766, + "task_loss": 0.09958979487419128 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697006190949143, + "compression_loss": 0.0, + "distillation_loss": 0.05856577679514885, + "epoch": 2.63, + "learning_rate": 3.4568316315980745e-05, + "loss": 0.0557, + "step": 2767, + "task_loss": 0.029869040474295616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697166816750208, + "compression_loss": 0.0, + "distillation_loss": 0.14246824383735657, + "epoch": 2.63, + "learning_rate": 3.455847150101395e-05, + "loss": 0.1396, + "step": 2768, + "task_loss": 0.11401065438985825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697327385772976, + "compression_loss": 0.0, + "distillation_loss": 0.08908317983150482, + "epoch": 2.63, + "learning_rate": 3.454862494970477e-05, + "loss": 0.1026, + "step": 2769, + "task_loss": 0.22423215210437775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697487898027486, + "compression_loss": 0.0, + "distillation_loss": 0.11774835735559464, + "epoch": 2.63, + "learning_rate": 3.4538776663841875e-05, + "loss": 0.1259, + "step": 2770, + "task_loss": 0.19950447976589203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697648353523773, + "compression_loss": 0.0, + "distillation_loss": 0.07487460970878601, + "epoch": 2.63, + "learning_rate": 3.452892664521427e-05, + "loss": 0.0828, + "step": 2771, + "task_loss": 0.15448282659053802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697808752271874, + "compression_loss": 0.0, + "distillation_loss": 0.13396842777729034, + "epoch": 2.63, + "learning_rate": 3.4519074895611244e-05, + "loss": 0.1337, + "step": 2772, + "task_loss": 0.13105513155460358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7697969094281827, + "compression_loss": 0.0, + "distillation_loss": 0.03769555687904358, + "epoch": 2.63, + "learning_rate": 3.4509221416822415e-05, + "loss": 0.0348, + "step": 2773, + "task_loss": 0.00889505073428154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7698129379563668, + "compression_loss": 0.0, + "distillation_loss": 0.1526089906692505, + "epoch": 2.63, + "learning_rate": 3.4499366210637725e-05, + "loss": 0.1464, + "step": 2774, + "task_loss": 0.09016816318035126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7698289608127434, + "compression_loss": 0.0, + "distillation_loss": 0.05517404526472092, + "epoch": 2.64, + "learning_rate": 3.4489509278847414e-05, + "loss": 0.0691, + "step": 2775, + "task_loss": 0.19403240084648132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7698449779983162, + "compression_loss": 0.0, + "distillation_loss": 0.14141535758972168, + "epoch": 2.64, + "learning_rate": 3.4479650623242036e-05, + "loss": 0.1409, + "step": 2776, + "task_loss": 0.13665583729743958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7698609895140888, + "compression_loss": 0.0, + "distillation_loss": 0.1828983724117279, + "epoch": 2.64, + "learning_rate": 3.446979024561246e-05, + "loss": 0.1816, + "step": 2777, + "task_loss": 0.1703716665506363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7698769953610649, + "compression_loss": 0.0, + "distillation_loss": 0.031704775989055634, + "epoch": 2.64, + "learning_rate": 3.44599281477499e-05, + "loss": 0.0384, + "step": 2778, + "task_loss": 0.098890021443367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7698929955402484, + "compression_loss": 0.0, + "distillation_loss": 0.03340402618050575, + "epoch": 2.64, + "learning_rate": 3.4450064331445814e-05, + "loss": 0.0319, + "step": 2779, + "task_loss": 0.018638523295521736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7699089900526426, + "compression_loss": 0.0, + "distillation_loss": 0.040086835622787476, + "epoch": 2.64, + "learning_rate": 3.444019879849204e-05, + "loss": 0.048, + "step": 2780, + "task_loss": 0.11879396438598633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7699249788992515, + "compression_loss": 0.0, + "distillation_loss": 0.05169200897216797, + "epoch": 2.64, + "learning_rate": 3.443033155068069e-05, + "loss": 0.0542, + "step": 2781, + "task_loss": 0.07657624781131744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7699409620810785, + "compression_loss": 0.0, + "distillation_loss": 0.20420724153518677, + "epoch": 2.64, + "learning_rate": 3.442046258980419e-05, + "loss": 0.1945, + "step": 2782, + "task_loss": 0.10754656046628952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7699569395991277, + "compression_loss": 0.0, + "distillation_loss": 0.1341618299484253, + "epoch": 2.64, + "learning_rate": 3.4410591917655296e-05, + "loss": 0.1326, + "step": 2783, + "task_loss": 0.1190015971660614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7699729114544024, + "compression_loss": 0.0, + "distillation_loss": 0.09340974688529968, + "epoch": 2.64, + "learning_rate": 3.4400719536027056e-05, + "loss": 0.0875, + "step": 2784, + "task_loss": 0.03399471938610077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7699888776479064, + "compression_loss": 0.0, + "distillation_loss": 0.19588154554367065, + "epoch": 2.64, + "learning_rate": 3.4390845446712836e-05, + "loss": 0.187, + "step": 2785, + "task_loss": 0.10719942301511765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7700048381806435, + "compression_loss": 0.0, + "distillation_loss": 0.11635538935661316, + "epoch": 2.65, + "learning_rate": 3.438096965150632e-05, + "loss": 0.1287, + "step": 2786, + "task_loss": 0.2399107664823532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7700207930536171, + "compression_loss": 0.0, + "distillation_loss": 0.17617712914943695, + "epoch": 2.65, + "learning_rate": 3.4371092152201485e-05, + "loss": 0.1735, + "step": 2787, + "task_loss": 0.14985665678977966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7700367422678311, + "compression_loss": 0.0, + "distillation_loss": 0.06373878568410873, + "epoch": 2.65, + "learning_rate": 3.4361212950592624e-05, + "loss": 0.0669, + "step": 2788, + "task_loss": 0.09571292251348495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7700526858242893, + "compression_loss": 0.0, + "distillation_loss": 0.05058780312538147, + "epoch": 2.65, + "learning_rate": 3.435133204847435e-05, + "loss": 0.0469, + "step": 2789, + "task_loss": 0.013910744339227676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7700686237239951, + "compression_loss": 0.0, + "distillation_loss": 0.10628429800271988, + "epoch": 2.65, + "learning_rate": 3.4341449447641575e-05, + "loss": 0.1091, + "step": 2790, + "task_loss": 0.13484588265419006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7700845559679523, + "compression_loss": 0.0, + "distillation_loss": 0.024575646966695786, + "epoch": 2.65, + "learning_rate": 3.433156514988951e-05, + "loss": 0.0287, + "step": 2791, + "task_loss": 0.06540574878454208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7701004825571647, + "compression_loss": 0.0, + "distillation_loss": 0.11763833463191986, + "epoch": 2.65, + "learning_rate": 3.432167915701371e-05, + "loss": 0.1207, + "step": 2792, + "task_loss": 0.14855128526687622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7701164034926358, + "compression_loss": 0.0, + "distillation_loss": 0.04107709601521492, + "epoch": 2.65, + "learning_rate": 3.431179147080999e-05, + "loss": 0.0611, + "step": 2793, + "task_loss": 0.24124276638031006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7701323187753693, + "compression_loss": 0.0, + "distillation_loss": 0.03191131353378296, + "epoch": 2.65, + "learning_rate": 3.4301902093074504e-05, + "loss": 0.0301, + "step": 2794, + "task_loss": 0.013331804424524307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.770148228406369, + "compression_loss": 0.0, + "distillation_loss": 0.029843294993042946, + "epoch": 2.65, + "learning_rate": 3.42920110256037e-05, + "loss": 0.0277, + "step": 2795, + "task_loss": 0.008593736216425896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7701641323866384, + "compression_loss": 0.0, + "distillation_loss": 0.05394424870610237, + "epoch": 2.66, + "learning_rate": 3.428211827019434e-05, + "loss": 0.0504, + "step": 2796, + "task_loss": 0.018413875252008438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7701800307171814, + "compression_loss": 0.0, + "distillation_loss": 0.12455835193395615, + "epoch": 2.66, + "learning_rate": 3.42722238286435e-05, + "loss": 0.1222, + "step": 2797, + "task_loss": 0.10132066905498505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7701959233990016, + "compression_loss": 0.0, + "distillation_loss": 0.036928869783878326, + "epoch": 2.66, + "learning_rate": 3.426232770274855e-05, + "loss": 0.0458, + "step": 2798, + "task_loss": 0.12519891560077667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7702118104331026, + "compression_loss": 0.0, + "distillation_loss": 0.03775065019726753, + "epoch": 2.66, + "learning_rate": 3.4252429894307154e-05, + "loss": 0.0426, + "step": 2799, + "task_loss": 0.08613856136798859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7702276918204881, + "compression_loss": 0.0, + "distillation_loss": 0.14951038360595703, + "epoch": 2.66, + "learning_rate": 3.424253040511731e-05, + "loss": 0.1439, + "step": 2800, + "task_loss": 0.09322544187307358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.770243567562162, + "compression_loss": 0.0, + "distillation_loss": 0.14305658638477325, + "epoch": 2.66, + "learning_rate": 3.4232629236977316e-05, + "loss": 0.1513, + "step": 2801, + "task_loss": 0.2257470041513443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7702594376591276, + "compression_loss": 0.0, + "distillation_loss": 0.08632157742977142, + "epoch": 2.66, + "learning_rate": 3.4222726391685746e-05, + "loss": 0.0854, + "step": 2802, + "task_loss": 0.07660988718271255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7702753021123889, + "compression_loss": 0.0, + "distillation_loss": 0.12226057052612305, + "epoch": 2.66, + "learning_rate": 3.42128218710415e-05, + "loss": 0.1323, + "step": 2803, + "task_loss": 0.22227300703525543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7702911609229496, + "compression_loss": 0.0, + "distillation_loss": 0.05076700448989868, + "epoch": 2.66, + "learning_rate": 3.420291567684381e-05, + "loss": 0.0488, + "step": 2804, + "task_loss": 0.03137904405593872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7703070140918131, + "compression_loss": 0.0, + "distillation_loss": 0.13917645812034607, + "epoch": 2.66, + "learning_rate": 3.419300781089216e-05, + "loss": 0.1431, + "step": 2805, + "task_loss": 0.17885732650756836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7703228616199832, + "compression_loss": 0.0, + "distillation_loss": 0.047890555113554, + "epoch": 2.66, + "learning_rate": 3.418309827498637e-05, + "loss": 0.053, + "step": 2806, + "task_loss": 0.09856240451335907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7703387035084637, + "compression_loss": 0.0, + "distillation_loss": 0.019215038046240807, + "epoch": 2.67, + "learning_rate": 3.4173187070926546e-05, + "loss": 0.0224, + "step": 2807, + "task_loss": 0.05135765299201012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7703545397582582, + "compression_loss": 0.0, + "distillation_loss": 0.07260240614414215, + "epoch": 2.67, + "learning_rate": 3.4163274200513116e-05, + "loss": 0.0875, + "step": 2808, + "task_loss": 0.221457377076149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7703703703703704, + "compression_loss": 0.0, + "distillation_loss": 0.06960202753543854, + "epoch": 2.67, + "learning_rate": 3.415335966554679e-05, + "loss": 0.0853, + "step": 2809, + "task_loss": 0.22612322866916656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7703861953458039, + "compression_loss": 0.0, + "distillation_loss": 0.0772671177983284, + "epoch": 2.67, + "learning_rate": 3.414344346782861e-05, + "loss": 0.0818, + "step": 2810, + "task_loss": 0.12265747040510178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7704020146855625, + "compression_loss": 0.0, + "distillation_loss": 0.16112419962882996, + "epoch": 2.67, + "learning_rate": 3.413352560915988e-05, + "loss": 0.1535, + "step": 2811, + "task_loss": 0.0847577229142189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7704178283906499, + "compression_loss": 0.0, + "distillation_loss": 0.23741042613983154, + "epoch": 2.67, + "learning_rate": 3.412360609134223e-05, + "loss": 0.2338, + "step": 2812, + "task_loss": 0.20153185725212097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7704336364620695, + "compression_loss": 0.0, + "distillation_loss": 0.10814424604177475, + "epoch": 2.67, + "learning_rate": 3.411368491617761e-05, + "loss": 0.1036, + "step": 2813, + "task_loss": 0.06266738474369049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7704494389008254, + "compression_loss": 0.0, + "distillation_loss": 0.06540797650814056, + "epoch": 2.67, + "learning_rate": 3.410376208546822e-05, + "loss": 0.0606, + "step": 2814, + "task_loss": 0.017390571534633636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7704652357079209, + "compression_loss": 0.0, + "distillation_loss": 0.033819716423749924, + "epoch": 2.67, + "learning_rate": 3.409383760101661e-05, + "loss": 0.0382, + "step": 2815, + "task_loss": 0.07716642320156097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.77048102688436, + "compression_loss": 0.0, + "distillation_loss": 0.20797955989837646, + "epoch": 2.67, + "learning_rate": 3.4083911464625596e-05, + "loss": 0.1993, + "step": 2816, + "task_loss": 0.12118206918239594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7704968124311462, + "compression_loss": 0.0, + "distillation_loss": 0.13762623071670532, + "epoch": 2.68, + "learning_rate": 3.407398367809832e-05, + "loss": 0.1356, + "step": 2817, + "task_loss": 0.11715132743120193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7705125923492833, + "compression_loss": 0.0, + "distillation_loss": 0.14595434069633484, + "epoch": 2.68, + "learning_rate": 3.406405424323821e-05, + "loss": 0.1374, + "step": 2818, + "task_loss": 0.06045624241232872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7705283666397748, + "compression_loss": 0.0, + "distillation_loss": 0.030591927468776703, + "epoch": 2.68, + "learning_rate": 3.4054123161848995e-05, + "loss": 0.0285, + "step": 2819, + "task_loss": 0.010143185034394264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7705441353036244, + "compression_loss": 0.0, + "distillation_loss": 0.21557293832302094, + "epoch": 2.68, + "learning_rate": 3.4044190435734695e-05, + "loss": 0.2094, + "step": 2820, + "task_loss": 0.15403705835342407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.770559898341836, + "compression_loss": 0.0, + "distillation_loss": 0.04488247632980347, + "epoch": 2.68, + "learning_rate": 3.403425606669965e-05, + "loss": 0.0427, + "step": 2821, + "task_loss": 0.023530708625912666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7705756557554131, + "compression_loss": 0.0, + "distillation_loss": 0.1816938817501068, + "epoch": 2.68, + "learning_rate": 3.4024320056548475e-05, + "loss": 0.174, + "step": 2822, + "task_loss": 0.10455642640590668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7705914075453595, + "compression_loss": 0.0, + "distillation_loss": 0.03927179425954819, + "epoch": 2.68, + "learning_rate": 3.401438240708611e-05, + "loss": 0.0501, + "step": 2823, + "task_loss": 0.14788560569286346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7706071537126787, + "compression_loss": 0.0, + "distillation_loss": 0.1478666365146637, + "epoch": 2.68, + "learning_rate": 3.400444312011776e-05, + "loss": 0.1539, + "step": 2824, + "task_loss": 0.20857636630535126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7706228942583746, + "compression_loss": 0.0, + "distillation_loss": 0.1341654360294342, + "epoch": 2.68, + "learning_rate": 3.399450219744896e-05, + "loss": 0.1379, + "step": 2825, + "task_loss": 0.17152555286884308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7706386291834507, + "compression_loss": 0.0, + "distillation_loss": 0.09930291771888733, + "epoch": 2.68, + "learning_rate": 3.3984559640885505e-05, + "loss": 0.1029, + "step": 2826, + "task_loss": 0.1356475055217743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7706543584889108, + "compression_loss": 0.0, + "distillation_loss": 0.049742672592401505, + "epoch": 2.68, + "learning_rate": 3.3974615452233526e-05, + "loss": 0.0463, + "step": 2827, + "task_loss": 0.015741702169179916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7706700821757586, + "compression_loss": 0.0, + "distillation_loss": 0.047701530158519745, + "epoch": 2.69, + "learning_rate": 3.396466963329944e-05, + "loss": 0.0506, + "step": 2828, + "task_loss": 0.0768967792391777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7706858002449977, + "compression_loss": 0.0, + "distillation_loss": 0.14289794862270355, + "epoch": 2.69, + "learning_rate": 3.395472218588992e-05, + "loss": 0.1379, + "step": 2829, + "task_loss": 0.09291721880435944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707015126976318, + "compression_loss": 0.0, + "distillation_loss": 0.11319032311439514, + "epoch": 2.69, + "learning_rate": 3.394477311181201e-05, + "loss": 0.1067, + "step": 2830, + "task_loss": 0.04876325652003288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707172195346645, + "compression_loss": 0.0, + "distillation_loss": 0.14416098594665527, + "epoch": 2.69, + "learning_rate": 3.393482241287297e-05, + "loss": 0.1571, + "step": 2831, + "task_loss": 0.2737889587879181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707329207570998, + "compression_loss": 0.0, + "distillation_loss": 0.1706182062625885, + "epoch": 2.69, + "learning_rate": 3.392487009088041e-05, + "loss": 0.1614, + "step": 2832, + "task_loss": 0.07805980741977692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707486163659409, + "compression_loss": 0.0, + "distillation_loss": 0.14137789607048035, + "epoch": 2.69, + "learning_rate": 3.391491614764222e-05, + "loss": 0.1342, + "step": 2833, + "task_loss": 0.07003333419561386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707643063621918, + "compression_loss": 0.0, + "distillation_loss": 0.09552104771137238, + "epoch": 2.69, + "learning_rate": 3.390496058496657e-05, + "loss": 0.1013, + "step": 2834, + "task_loss": 0.15300557017326355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707799907468562, + "compression_loss": 0.0, + "distillation_loss": 0.1020299419760704, + "epoch": 2.69, + "learning_rate": 3.3895003404661955e-05, + "loss": 0.1235, + "step": 2835, + "task_loss": 0.3162316679954529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7707956695209377, + "compression_loss": 0.0, + "distillation_loss": 0.10217801481485367, + "epoch": 2.69, + "learning_rate": 3.3885044608537125e-05, + "loss": 0.1284, + "step": 2836, + "task_loss": 0.3640749156475067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7708113426854399, + "compression_loss": 0.0, + "distillation_loss": 0.09118107706308365, + "epoch": 2.69, + "learning_rate": 3.387508419840115e-05, + "loss": 0.0858, + "step": 2837, + "task_loss": 0.03702104091644287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7708270102413666, + "compression_loss": 0.0, + "distillation_loss": 0.123194620013237, + "epoch": 2.7, + "learning_rate": 3.386512217606339e-05, + "loss": 0.1161, + "step": 2838, + "task_loss": 0.05249645560979843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7708426721897215, + "compression_loss": 0.0, + "distillation_loss": 0.0807013064622879, + "epoch": 2.7, + "learning_rate": 3.385515854333349e-05, + "loss": 0.0786, + "step": 2839, + "task_loss": 0.06016937270760536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7708583285315082, + "compression_loss": 0.0, + "distillation_loss": 0.08974245935678482, + "epoch": 2.7, + "learning_rate": 3.38451933020214e-05, + "loss": 0.0873, + "step": 2840, + "task_loss": 0.0655125230550766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7708739792677303, + "compression_loss": 0.0, + "distillation_loss": 0.18268810212612152, + "epoch": 2.7, + "learning_rate": 3.383522645393734e-05, + "loss": 0.1749, + "step": 2841, + "task_loss": 0.10475851595401764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7708896243993917, + "compression_loss": 0.0, + "distillation_loss": 0.23169159889221191, + "epoch": 2.7, + "learning_rate": 3.3825258000891846e-05, + "loss": 0.2429, + "step": 2842, + "task_loss": 0.34396815299987793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7709052639274959, + "compression_loss": 0.0, + "distillation_loss": 0.13757774233818054, + "epoch": 2.7, + "learning_rate": 3.381528794469574e-05, + "loss": 0.1312, + "step": 2843, + "task_loss": 0.07409615814685822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7709208978530467, + "compression_loss": 0.0, + "distillation_loss": 0.029491236433386803, + "epoch": 2.7, + "learning_rate": 3.380531628716012e-05, + "loss": 0.0344, + "step": 2844, + "task_loss": 0.07904480397701263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7709365261770478, + "compression_loss": 0.0, + "distillation_loss": 0.08474864065647125, + "epoch": 2.7, + "learning_rate": 3.3795343030096384e-05, + "loss": 0.0934, + "step": 2845, + "task_loss": 0.17167925834655762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7709521489005028, + "compression_loss": 0.0, + "distillation_loss": 0.03882834315299988, + "epoch": 2.7, + "learning_rate": 3.3785368175316226e-05, + "loss": 0.0392, + "step": 2846, + "task_loss": 0.042411088943481445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7709677660244153, + "compression_loss": 0.0, + "distillation_loss": 0.04816707968711853, + "epoch": 2.7, + "learning_rate": 3.377539172463164e-05, + "loss": 0.0444, + "step": 2847, + "task_loss": 0.010003169998526573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7709833775497892, + "compression_loss": 0.0, + "distillation_loss": 0.09748025238513947, + "epoch": 2.7, + "learning_rate": 3.376541367985488e-05, + "loss": 0.1011, + "step": 2848, + "task_loss": 0.13417553901672363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.770998983477628, + "compression_loss": 0.0, + "distillation_loss": 0.032245099544525146, + "epoch": 2.71, + "learning_rate": 3.3755434042798506e-05, + "loss": 0.0298, + "step": 2849, + "task_loss": 0.008108781650662422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7710145838089355, + "compression_loss": 0.0, + "distillation_loss": 0.028734374791383743, + "epoch": 2.71, + "learning_rate": 3.374545281527538e-05, + "loss": 0.0302, + "step": 2850, + "task_loss": 0.043056122958660126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7710301785447152, + "compression_loss": 0.0, + "distillation_loss": 0.04646429419517517, + "epoch": 2.71, + "learning_rate": 3.3735469999098615e-05, + "loss": 0.0477, + "step": 2851, + "task_loss": 0.05848044529557228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7710457676859711, + "compression_loss": 0.0, + "distillation_loss": 0.09431092441082001, + "epoch": 2.71, + "learning_rate": 3.372548559608166e-05, + "loss": 0.101, + "step": 2852, + "task_loss": 0.16074809432029724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7710613512337066, + "compression_loss": 0.0, + "distillation_loss": 0.042417608201503754, + "epoch": 2.71, + "learning_rate": 3.3715499608038234e-05, + "loss": 0.0391, + "step": 2853, + "task_loss": 0.009729046374559402 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7710769291889255, + "compression_loss": 0.0, + "distillation_loss": 0.10295215994119644, + "epoch": 2.71, + "learning_rate": 3.370551203678233e-05, + "loss": 0.0966, + "step": 2854, + "task_loss": 0.03992331400513649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7710925015526314, + "compression_loss": 0.0, + "distillation_loss": 0.026899177581071854, + "epoch": 2.71, + "learning_rate": 3.369552288412822e-05, + "loss": 0.0407, + "step": 2855, + "task_loss": 0.1650460809469223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7711080683258281, + "compression_loss": 0.0, + "distillation_loss": 0.09420739114284515, + "epoch": 2.71, + "learning_rate": 3.368553215189052e-05, + "loss": 0.0965, + "step": 2856, + "task_loss": 0.1171763613820076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7711236295095193, + "compression_loss": 0.0, + "distillation_loss": 0.0723368227481842, + "epoch": 2.71, + "learning_rate": 3.367553984188407e-05, + "loss": 0.0738, + "step": 2857, + "task_loss": 0.08675511181354523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7711391851047085, + "compression_loss": 0.0, + "distillation_loss": 0.10755441337823868, + "epoch": 2.71, + "learning_rate": 3.366554595592402e-05, + "loss": 0.103, + "step": 2858, + "task_loss": 0.06218289956450462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7711547351123995, + "compression_loss": 0.0, + "distillation_loss": 0.11410737037658691, + "epoch": 2.72, + "learning_rate": 3.365555049582582e-05, + "loss": 0.1191, + "step": 2859, + "task_loss": 0.16380539536476135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771170279533596, + "compression_loss": 0.0, + "distillation_loss": 0.09805843234062195, + "epoch": 2.72, + "learning_rate": 3.364555346340518e-05, + "loss": 0.0936, + "step": 2860, + "task_loss": 0.053569987416267395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7711858183693016, + "compression_loss": 0.0, + "distillation_loss": 0.11211707442998886, + "epoch": 2.72, + "learning_rate": 3.3635554860478126e-05, + "loss": 0.1062, + "step": 2861, + "task_loss": 0.053317755460739136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7712013516205201, + "compression_loss": 0.0, + "distillation_loss": 0.06892388314008713, + "epoch": 2.72, + "learning_rate": 3.362555468886093e-05, + "loss": 0.0627, + "step": 2862, + "task_loss": 0.006360730156302452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771216879288255, + "compression_loss": 0.0, + "distillation_loss": 0.10119855403900146, + "epoch": 2.72, + "learning_rate": 3.361555295037019e-05, + "loss": 0.0967, + "step": 2863, + "task_loss": 0.05646238476037979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7712324013735102, + "compression_loss": 0.0, + "distillation_loss": 0.10500767827033997, + "epoch": 2.72, + "learning_rate": 3.360554964682276e-05, + "loss": 0.1036, + "step": 2864, + "task_loss": 0.09080617129802704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7712479178772893, + "compression_loss": 0.0, + "distillation_loss": 0.025210561230778694, + "epoch": 2.72, + "learning_rate": 3.359554478003579e-05, + "loss": 0.0285, + "step": 2865, + "task_loss": 0.0582006499171257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771263428800596, + "compression_loss": 0.0, + "distillation_loss": 0.16816392540931702, + "epoch": 2.72, + "learning_rate": 3.358553835182673e-05, + "loss": 0.1681, + "step": 2866, + "task_loss": 0.16722428798675537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7712789341444338, + "compression_loss": 0.0, + "distillation_loss": 0.054264236241579056, + "epoch": 2.72, + "learning_rate": 3.357553036401326e-05, + "loss": 0.0504, + "step": 2867, + "task_loss": 0.015533113852143288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7712944339098066, + "compression_loss": 0.0, + "distillation_loss": 0.11492185294628143, + "epoch": 2.72, + "learning_rate": 3.356552081841341e-05, + "loss": 0.1207, + "step": 2868, + "task_loss": 0.1728590577840805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771309928097718, + "compression_loss": 0.0, + "distillation_loss": 0.19074279069900513, + "epoch": 2.72, + "learning_rate": 3.355550971684545e-05, + "loss": 0.191, + "step": 2869, + "task_loss": 0.19339722394943237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7713254167091717, + "compression_loss": 0.0, + "distillation_loss": 0.09511333703994751, + "epoch": 2.73, + "learning_rate": 3.3545497061127946e-05, + "loss": 0.095, + "step": 2870, + "task_loss": 0.0938149243593216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7713408997451714, + "compression_loss": 0.0, + "distillation_loss": 0.03391757979989052, + "epoch": 2.73, + "learning_rate": 3.353548285307975e-05, + "loss": 0.0316, + "step": 2871, + "task_loss": 0.011141767725348473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7713563772067207, + "compression_loss": 0.0, + "distillation_loss": 0.04854762554168701, + "epoch": 2.73, + "learning_rate": 3.352546709451998e-05, + "loss": 0.076, + "step": 2872, + "task_loss": 0.3228394389152527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7713718490948234, + "compression_loss": 0.0, + "distillation_loss": 0.07784444838762283, + "epoch": 2.73, + "learning_rate": 3.351544978726805e-05, + "loss": 0.0753, + "step": 2873, + "task_loss": 0.05243542417883873 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771387315410483, + "compression_loss": 0.0, + "distillation_loss": 0.050782375037670135, + "epoch": 2.73, + "learning_rate": 3.350543093314366e-05, + "loss": 0.0468, + "step": 2874, + "task_loss": 0.01137268915772438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7714027761547034, + "compression_loss": 0.0, + "distillation_loss": 0.1053369790315628, + "epoch": 2.73, + "learning_rate": 3.349541053396678e-05, + "loss": 0.109, + "step": 2875, + "task_loss": 0.14205503463745117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7714182313284882, + "compression_loss": 0.0, + "distillation_loss": 0.12449757009744644, + "epoch": 2.73, + "learning_rate": 3.348538859155766e-05, + "loss": 0.1181, + "step": 2876, + "task_loss": 0.060163747519254684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771433680932841, + "compression_loss": 0.0, + "distillation_loss": 0.16609831154346466, + "epoch": 2.73, + "learning_rate": 3.347536510773684e-05, + "loss": 0.1762, + "step": 2877, + "task_loss": 0.2674504816532135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7714491249687656, + "compression_loss": 0.0, + "distillation_loss": 0.015941990539431572, + "epoch": 2.73, + "learning_rate": 3.346534008432513e-05, + "loss": 0.0148, + "step": 2878, + "task_loss": 0.004297057166695595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7714645634372655, + "compression_loss": 0.0, + "distillation_loss": 0.06580062955617905, + "epoch": 2.73, + "learning_rate": 3.3455313523143615e-05, + "loss": 0.0644, + "step": 2879, + "task_loss": 0.05226878076791763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7714799963393447, + "compression_loss": 0.0, + "distillation_loss": 0.041852548718452454, + "epoch": 2.74, + "learning_rate": 3.3445285426013685e-05, + "loss": 0.0441, + "step": 2880, + "task_loss": 0.0639595314860344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7714954236760067, + "compression_loss": 0.0, + "distillation_loss": 0.14709192514419556, + "epoch": 2.74, + "learning_rate": 3.343525579475698e-05, + "loss": 0.1519, + "step": 2881, + "task_loss": 0.19560851156711578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771510845448255, + "compression_loss": 0.0, + "distillation_loss": 0.02442905679345131, + "epoch": 2.74, + "learning_rate": 3.342522463119543e-05, + "loss": 0.0226, + "step": 2882, + "task_loss": 0.00585402175784111 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7715262616570936, + "compression_loss": 0.0, + "distillation_loss": 0.08210425078868866, + "epoch": 2.74, + "learning_rate": 3.341519193715127e-05, + "loss": 0.087, + "step": 2883, + "task_loss": 0.13087433576583862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771541672303526, + "compression_loss": 0.0, + "distillation_loss": 0.021984929218888283, + "epoch": 2.74, + "learning_rate": 3.340515771444695e-05, + "loss": 0.0207, + "step": 2884, + "task_loss": 0.008725512772798538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771557077388556, + "compression_loss": 0.0, + "distillation_loss": 0.1168016642332077, + "epoch": 2.74, + "learning_rate": 3.3395121964905265e-05, + "loss": 0.1205, + "step": 2885, + "task_loss": 0.15377360582351685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771572476913187, + "compression_loss": 0.0, + "distillation_loss": 0.16023914515972137, + "epoch": 2.74, + "learning_rate": 3.338508469034922e-05, + "loss": 0.1587, + "step": 2886, + "task_loss": 0.14454258978366852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7715878708784231, + "compression_loss": 0.0, + "distillation_loss": 0.05907361954450607, + "epoch": 2.74, + "learning_rate": 3.3375045892602176e-05, + "loss": 0.0606, + "step": 2887, + "task_loss": 0.07458890229463577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716032592852676, + "compression_loss": 0.0, + "distillation_loss": 0.0668545514345169, + "epoch": 2.74, + "learning_rate": 3.3365005573487706e-05, + "loss": 0.0705, + "step": 2888, + "task_loss": 0.10356783866882324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716186421347244, + "compression_loss": 0.0, + "distillation_loss": 0.052939146757125854, + "epoch": 2.74, + "learning_rate": 3.335496373482969e-05, + "loss": 0.0487, + "step": 2889, + "task_loss": 0.010425196960568428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716340194277972, + "compression_loss": 0.0, + "distillation_loss": 0.022259045392274857, + "epoch": 2.74, + "learning_rate": 3.334492037845227e-05, + "loss": 0.0272, + "step": 2890, + "task_loss": 0.07185941934585571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716493911654896, + "compression_loss": 0.0, + "distillation_loss": 0.05123762786388397, + "epoch": 2.75, + "learning_rate": 3.333487550617987e-05, + "loss": 0.0698, + "step": 2891, + "task_loss": 0.2367219626903534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716647573488052, + "compression_loss": 0.0, + "distillation_loss": 0.18512699007987976, + "epoch": 2.75, + "learning_rate": 3.332482911983721e-05, + "loss": 0.1761, + "step": 2892, + "task_loss": 0.09492430090904236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716801179787479, + "compression_loss": 0.0, + "distillation_loss": 0.18022358417510986, + "epoch": 2.75, + "learning_rate": 3.331478122124924e-05, + "loss": 0.1795, + "step": 2893, + "task_loss": 0.1729545146226883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7716954730563212, + "compression_loss": 0.0, + "distillation_loss": 0.020589305087924004, + "epoch": 2.75, + "learning_rate": 3.330473181224121e-05, + "loss": 0.0191, + "step": 2894, + "task_loss": 0.005505530163645744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7717108225825289, + "compression_loss": 0.0, + "distillation_loss": 0.030846502631902695, + "epoch": 2.75, + "learning_rate": 3.3294680894638655e-05, + "loss": 0.0285, + "step": 2895, + "task_loss": 0.0070547014474868774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7717261665583746, + "compression_loss": 0.0, + "distillation_loss": 0.020820213481783867, + "epoch": 2.75, + "learning_rate": 3.328462847026736e-05, + "loss": 0.0287, + "step": 2896, + "task_loss": 0.09968282282352448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7717415049848619, + "compression_loss": 0.0, + "distillation_loss": 0.09029317647218704, + "epoch": 2.75, + "learning_rate": 3.327457454095342e-05, + "loss": 0.0924, + "step": 2897, + "task_loss": 0.11134281009435654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7717568378629946, + "compression_loss": 0.0, + "distillation_loss": 0.06361924111843109, + "epoch": 2.75, + "learning_rate": 3.3264519108523154e-05, + "loss": 0.0595, + "step": 2898, + "task_loss": 0.022628160193562508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7717721651937766, + "compression_loss": 0.0, + "distillation_loss": 0.04663745313882828, + "epoch": 2.75, + "learning_rate": 3.3254462174803186e-05, + "loss": 0.048, + "step": 2899, + "task_loss": 0.060523584485054016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7717874869782111, + "compression_loss": 0.0, + "distillation_loss": 0.23908516764640808, + "epoch": 2.75, + "learning_rate": 3.324440374162041e-05, + "loss": 0.2375, + "step": 2900, + "task_loss": 0.22366894781589508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718028032173022, + "compression_loss": 0.0, + "distillation_loss": 0.07372735440731049, + "epoch": 2.75, + "learning_rate": 3.323434381080199e-05, + "loss": 0.0802, + "step": 2901, + "task_loss": 0.13813573122024536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718181139120533, + "compression_loss": 0.0, + "distillation_loss": 0.04444555193185806, + "epoch": 2.76, + "learning_rate": 3.322428238417537e-05, + "loss": 0.0548, + "step": 2902, + "task_loss": 0.1481703817844391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718334190634683, + "compression_loss": 0.0, + "distillation_loss": 0.10492336750030518, + "epoch": 2.76, + "learning_rate": 3.321421946356823e-05, + "loss": 0.0998, + "step": 2903, + "task_loss": 0.05350606143474579 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718487186725508, + "compression_loss": 0.0, + "distillation_loss": 0.043171461671590805, + "epoch": 2.76, + "learning_rate": 3.320415505080858e-05, + "loss": 0.0399, + "step": 2904, + "task_loss": 0.010258587077260017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718640127403044, + "compression_loss": 0.0, + "distillation_loss": 0.09338559955358505, + "epoch": 2.76, + "learning_rate": 3.3194089147724644e-05, + "loss": 0.0905, + "step": 2905, + "task_loss": 0.0642678365111351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718793012677327, + "compression_loss": 0.0, + "distillation_loss": 0.07148714363574982, + "epoch": 2.76, + "learning_rate": 3.3184021756144954e-05, + "loss": 0.0692, + "step": 2906, + "task_loss": 0.048625778406858444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7718945842558398, + "compression_loss": 0.0, + "distillation_loss": 0.053975023329257965, + "epoch": 2.76, + "learning_rate": 3.317395287789829e-05, + "loss": 0.0522, + "step": 2907, + "task_loss": 0.03605438768863678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7719098617056289, + "compression_loss": 0.0, + "distillation_loss": 0.21236878633499146, + "epoch": 2.76, + "learning_rate": 3.316388251481373e-05, + "loss": 0.2012, + "step": 2908, + "task_loss": 0.10068190097808838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771925133618104, + "compression_loss": 0.0, + "distillation_loss": 0.08693385124206543, + "epoch": 2.76, + "learning_rate": 3.3153810668720594e-05, + "loss": 0.0805, + "step": 2909, + "task_loss": 0.022921759635210037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7719403999942687, + "compression_loss": 0.0, + "distillation_loss": 0.07356773316860199, + "epoch": 2.76, + "learning_rate": 3.3143737341448475e-05, + "loss": 0.0745, + "step": 2910, + "task_loss": 0.08280356228351593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7719556608351267, + "compression_loss": 0.0, + "distillation_loss": 0.03629321604967117, + "epoch": 2.76, + "learning_rate": 3.3133662534827255e-05, + "loss": 0.034, + "step": 2911, + "task_loss": 0.01335129514336586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7719709161416815, + "compression_loss": 0.0, + "distillation_loss": 0.08336710929870605, + "epoch": 2.77, + "learning_rate": 3.3123586250687055e-05, + "loss": 0.078, + "step": 2912, + "task_loss": 0.02927432768046856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.771986165914937, + "compression_loss": 0.0, + "distillation_loss": 0.07923730462789536, + "epoch": 2.77, + "learning_rate": 3.311350849085829e-05, + "loss": 0.0754, + "step": 2913, + "task_loss": 0.04064404219388962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7720014101558967, + "compression_loss": 0.0, + "distillation_loss": 0.1845068484544754, + "epoch": 2.77, + "learning_rate": 3.3103429257171635e-05, + "loss": 0.1826, + "step": 2914, + "task_loss": 0.16558828949928284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7720166488655645, + "compression_loss": 0.0, + "distillation_loss": 0.03765151649713516, + "epoch": 2.77, + "learning_rate": 3.309334855145803e-05, + "loss": 0.043, + "step": 2915, + "task_loss": 0.09107490628957748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7720318820449439, + "compression_loss": 0.0, + "distillation_loss": 0.1513894945383072, + "epoch": 2.77, + "learning_rate": 3.3083266375548675e-05, + "loss": 0.1645, + "step": 2916, + "task_loss": 0.2826227843761444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7720471096950388, + "compression_loss": 0.0, + "distillation_loss": 0.10186254978179932, + "epoch": 2.77, + "learning_rate": 3.3073182731275064e-05, + "loss": 0.098, + "step": 2917, + "task_loss": 0.06284405291080475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7720623318168525, + "compression_loss": 0.0, + "distillation_loss": 0.18332624435424805, + "epoch": 2.77, + "learning_rate": 3.306309762046892e-05, + "loss": 0.1762, + "step": 2918, + "task_loss": 0.1121426373720169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7720775484113891, + "compression_loss": 0.0, + "distillation_loss": 0.0718914344906807, + "epoch": 2.77, + "learning_rate": 3.305301104496227e-05, + "loss": 0.0692, + "step": 2919, + "task_loss": 0.04449700936675072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772092759479652, + "compression_loss": 0.0, + "distillation_loss": 0.19091036915779114, + "epoch": 2.77, + "learning_rate": 3.3042923006587366e-05, + "loss": 0.1988, + "step": 2920, + "task_loss": 0.26946884393692017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772107965022645, + "compression_loss": 0.0, + "distillation_loss": 0.05884380638599396, + "epoch": 2.77, + "learning_rate": 3.303283350717678e-05, + "loss": 0.0645, + "step": 2921, + "task_loss": 0.11569136381149292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7721231650413717, + "compression_loss": 0.0, + "distillation_loss": 0.06322423368692398, + "epoch": 2.77, + "learning_rate": 3.302274254856329e-05, + "loss": 0.06, + "step": 2922, + "task_loss": 0.031089693307876587 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772138359536836, + "compression_loss": 0.0, + "distillation_loss": 0.13263660669326782, + "epoch": 2.78, + "learning_rate": 3.301265013257998e-05, + "loss": 0.1316, + "step": 2923, + "task_loss": 0.12230206280946732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7721535485100413, + "compression_loss": 0.0, + "distillation_loss": 0.1299370527267456, + "epoch": 2.78, + "learning_rate": 3.300255626106019e-05, + "loss": 0.1243, + "step": 2924, + "task_loss": 0.07319293916225433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7721687319619914, + "compression_loss": 0.0, + "distillation_loss": 0.024539334699511528, + "epoch": 2.78, + "learning_rate": 3.2992460935837505e-05, + "loss": 0.0226, + "step": 2925, + "task_loss": 0.005514673888683319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.77218390989369, + "compression_loss": 0.0, + "distillation_loss": 0.14706680178642273, + "epoch": 2.78, + "learning_rate": 3.2982364158745805e-05, + "loss": 0.1399, + "step": 2926, + "task_loss": 0.07497966289520264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7721990823061408, + "compression_loss": 0.0, + "distillation_loss": 0.07125681638717651, + "epoch": 2.78, + "learning_rate": 3.297226593161921e-05, + "loss": 0.0669, + "step": 2927, + "task_loss": 0.027698028832674026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7722142492003474, + "compression_loss": 0.0, + "distillation_loss": 0.25317683815956116, + "epoch": 2.78, + "learning_rate": 3.2962166256292113e-05, + "loss": 0.2522, + "step": 2928, + "task_loss": 0.24299074709415436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7722294105773136, + "compression_loss": 0.0, + "distillation_loss": 0.10056150704622269, + "epoch": 2.78, + "learning_rate": 3.295206513459917e-05, + "loss": 0.0971, + "step": 2929, + "task_loss": 0.06555324792861938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772244566438043, + "compression_loss": 0.0, + "distillation_loss": 0.10626856982707977, + "epoch": 2.78, + "learning_rate": 3.2941962568375296e-05, + "loss": 0.1043, + "step": 2930, + "task_loss": 0.08672097325325012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7722597167835393, + "compression_loss": 0.0, + "distillation_loss": 0.02176981046795845, + "epoch": 2.78, + "learning_rate": 3.2931858559455674e-05, + "loss": 0.043, + "step": 2931, + "task_loss": 0.23452335596084595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7722748616148062, + "compression_loss": 0.0, + "distillation_loss": 0.26498720049858093, + "epoch": 2.78, + "learning_rate": 3.292175310967575e-05, + "loss": 0.2554, + "step": 2932, + "task_loss": 0.16884511709213257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7722900009328473, + "compression_loss": 0.0, + "distillation_loss": 0.020644748583436012, + "epoch": 2.79, + "learning_rate": 3.291164622087122e-05, + "loss": 0.0191, + "step": 2933, + "task_loss": 0.005039013922214508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723051347386664, + "compression_loss": 0.0, + "distillation_loss": 0.02696690335869789, + "epoch": 2.79, + "learning_rate": 3.290153789487804e-05, + "loss": 0.0264, + "step": 2934, + "task_loss": 0.02141895517706871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723202630332671, + "compression_loss": 0.0, + "distillation_loss": 0.056153517216444016, + "epoch": 2.79, + "learning_rate": 3.289142813353246e-05, + "loss": 0.0626, + "step": 2935, + "task_loss": 0.12072312086820602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723353858176532, + "compression_loss": 0.0, + "distillation_loss": 0.09314046055078506, + "epoch": 2.79, + "learning_rate": 3.2881316938670945e-05, + "loss": 0.1048, + "step": 2936, + "task_loss": 0.20927327871322632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723505030928282, + "compression_loss": 0.0, + "distillation_loss": 0.02801085263490677, + "epoch": 2.79, + "learning_rate": 3.2871204312130254e-05, + "loss": 0.0387, + "step": 2937, + "task_loss": 0.13507309556007385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723656148597958, + "compression_loss": 0.0, + "distillation_loss": 0.0866575613617897, + "epoch": 2.79, + "learning_rate": 3.28610902557474e-05, + "loss": 0.0907, + "step": 2938, + "task_loss": 0.12749417126178741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723807211195599, + "compression_loss": 0.0, + "distillation_loss": 0.11423015594482422, + "epoch": 2.79, + "learning_rate": 3.285097477135963e-05, + "loss": 0.1059, + "step": 2939, + "task_loss": 0.030799927189946175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7723958218731239, + "compression_loss": 0.0, + "distillation_loss": 0.10484655201435089, + "epoch": 2.79, + "learning_rate": 3.284085786080449e-05, + "loss": 0.1022, + "step": 2940, + "task_loss": 0.07836197316646576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7724109171214917, + "compression_loss": 0.0, + "distillation_loss": 0.03331822156906128, + "epoch": 2.79, + "learning_rate": 3.283073952591975e-05, + "loss": 0.0453, + "step": 2941, + "task_loss": 0.1528467983007431 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772426006865667, + "compression_loss": 0.0, + "distillation_loss": 0.15390589833259583, + "epoch": 2.79, + "learning_rate": 3.2820619768543473e-05, + "loss": 0.1486, + "step": 2942, + "task_loss": 0.10106901824474335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7724410911066533, + "compression_loss": 0.0, + "distillation_loss": 0.02205563150346279, + "epoch": 2.79, + "learning_rate": 3.281049859051394e-05, + "loss": 0.0202, + "step": 2943, + "task_loss": 0.003351377323269844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7724561698454543, + "compression_loss": 0.0, + "distillation_loss": 0.04081364721059799, + "epoch": 2.8, + "learning_rate": 3.280037599366972e-05, + "loss": 0.039, + "step": 2944, + "task_loss": 0.022568656131625175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7724712430830738, + "compression_loss": 0.0, + "distillation_loss": 0.10027671605348587, + "epoch": 2.8, + "learning_rate": 3.2790251979849654e-05, + "loss": 0.0971, + "step": 2945, + "task_loss": 0.06809482723474503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7724863108205153, + "compression_loss": 0.0, + "distillation_loss": 0.11592331528663635, + "epoch": 2.8, + "learning_rate": 3.278012655089277e-05, + "loss": 0.1258, + "step": 2946, + "task_loss": 0.21512287855148315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725013730587829, + "compression_loss": 0.0, + "distillation_loss": 0.11402192711830139, + "epoch": 2.8, + "learning_rate": 3.276999970863845e-05, + "loss": 0.1178, + "step": 2947, + "task_loss": 0.1516398787498474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725164297988798, + "compression_loss": 0.0, + "distillation_loss": 0.039229217916727066, + "epoch": 2.8, + "learning_rate": 3.275987145492625e-05, + "loss": 0.0364, + "step": 2948, + "task_loss": 0.010563917458057404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725314810418099, + "compression_loss": 0.0, + "distillation_loss": 0.179963618516922, + "epoch": 2.8, + "learning_rate": 3.274974179159603e-05, + "loss": 0.181, + "step": 2949, + "task_loss": 0.1902376413345337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725465267885768, + "compression_loss": 0.0, + "distillation_loss": 0.08978709578514099, + "epoch": 2.8, + "learning_rate": 3.27396107204879e-05, + "loss": 0.0879, + "step": 2950, + "task_loss": 0.07117792963981628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725615670401843, + "compression_loss": 0.0, + "distillation_loss": 0.15075881779193878, + "epoch": 2.8, + "learning_rate": 3.2729478243442194e-05, + "loss": 0.1489, + "step": 2951, + "task_loss": 0.13251779973506927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725766017976361, + "compression_loss": 0.0, + "distillation_loss": 0.07806205749511719, + "epoch": 2.8, + "learning_rate": 3.271934436229955e-05, + "loss": 0.0817, + "step": 2952, + "task_loss": 0.11416235566139221 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7725916310619357, + "compression_loss": 0.0, + "distillation_loss": 0.23699919879436493, + "epoch": 2.8, + "learning_rate": 3.270920907890082e-05, + "loss": 0.2343, + "step": 2953, + "task_loss": 0.21003299951553345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772606654834087, + "compression_loss": 0.0, + "distillation_loss": 0.09724032133817673, + "epoch": 2.81, + "learning_rate": 3.269907239508714e-05, + "loss": 0.0913, + "step": 2954, + "task_loss": 0.037794340401887894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7726216731150934, + "compression_loss": 0.0, + "distillation_loss": 0.11641349643468857, + "epoch": 2.81, + "learning_rate": 3.268893431269987e-05, + "loss": 0.1332, + "step": 2955, + "task_loss": 0.2846828103065491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7726366859059589, + "compression_loss": 0.0, + "distillation_loss": 0.13409125804901123, + "epoch": 2.81, + "learning_rate": 3.2678794833580654e-05, + "loss": 0.1328, + "step": 2956, + "task_loss": 0.1211758628487587 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7726516932076869, + "compression_loss": 0.0, + "distillation_loss": 0.1196846067905426, + "epoch": 2.81, + "learning_rate": 3.2668653959571384e-05, + "loss": 0.1139, + "step": 2957, + "task_loss": 0.06228470057249069 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7726666950212814, + "compression_loss": 0.0, + "distillation_loss": 0.12382517009973526, + "epoch": 2.81, + "learning_rate": 3.2658511692514184e-05, + "loss": 0.1171, + "step": 2958, + "task_loss": 0.056750208139419556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7726816913477457, + "compression_loss": 0.0, + "distillation_loss": 0.07181763648986816, + "epoch": 2.81, + "learning_rate": 3.2648368034251454e-05, + "loss": 0.073, + "step": 2959, + "task_loss": 0.0838204026222229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7726966821880837, + "compression_loss": 0.0, + "distillation_loss": 0.19776178896427155, + "epoch": 2.81, + "learning_rate": 3.263822298662583e-05, + "loss": 0.1927, + "step": 2960, + "task_loss": 0.14683988690376282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7727116675432992, + "compression_loss": 0.0, + "distillation_loss": 0.1264166533946991, + "epoch": 2.81, + "learning_rate": 3.2628076551480216e-05, + "loss": 0.1285, + "step": 2961, + "task_loss": 0.14740031957626343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7727266474143957, + "compression_loss": 0.0, + "distillation_loss": 0.18037302792072296, + "epoch": 2.81, + "learning_rate": 3.2617928730657764e-05, + "loss": 0.189, + "step": 2962, + "task_loss": 0.2670821249485016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7727416218023769, + "compression_loss": 0.0, + "distillation_loss": 0.02856997214257717, + "epoch": 2.81, + "learning_rate": 3.260777952600186e-05, + "loss": 0.0308, + "step": 2963, + "task_loss": 0.051173169165849686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7727565907082465, + "compression_loss": 0.0, + "distillation_loss": 0.20055626332759857, + "epoch": 2.81, + "learning_rate": 3.2597628939356175e-05, + "loss": 0.1923, + "step": 2964, + "task_loss": 0.11836535483598709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7727715541330082, + "compression_loss": 0.0, + "distillation_loss": 0.10277973115444183, + "epoch": 2.82, + "learning_rate": 3.25874769725646e-05, + "loss": 0.1018, + "step": 2965, + "task_loss": 0.09281755983829498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7727865120776657, + "compression_loss": 0.0, + "distillation_loss": 0.07278560847043991, + "epoch": 2.82, + "learning_rate": 3.257732362747129e-05, + "loss": 0.0731, + "step": 2966, + "task_loss": 0.07589083909988403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7728014645432226, + "compression_loss": 0.0, + "distillation_loss": 0.05969984084367752, + "epoch": 2.82, + "learning_rate": 3.256716890592065e-05, + "loss": 0.0624, + "step": 2967, + "task_loss": 0.08681820333003998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7728164115306826, + "compression_loss": 0.0, + "distillation_loss": 0.1937519758939743, + "epoch": 2.82, + "learning_rate": 3.255701280975733e-05, + "loss": 0.1939, + "step": 2968, + "task_loss": 0.19502577185630798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7728313530410496, + "compression_loss": 0.0, + "distillation_loss": 0.14739632606506348, + "epoch": 2.82, + "learning_rate": 3.2546855340826246e-05, + "loss": 0.1487, + "step": 2969, + "task_loss": 0.1602475941181183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7728462890753269, + "compression_loss": 0.0, + "distillation_loss": 0.16489112377166748, + "epoch": 2.82, + "learning_rate": 3.253669650097254e-05, + "loss": 0.1558, + "step": 2970, + "task_loss": 0.07406702637672424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7728612196345185, + "compression_loss": 0.0, + "distillation_loss": 0.10662008821964264, + "epoch": 2.82, + "learning_rate": 3.2526536292041625e-05, + "loss": 0.1111, + "step": 2971, + "task_loss": 0.1511189192533493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772876144719628, + "compression_loss": 0.0, + "distillation_loss": 0.0431804321706295, + "epoch": 2.82, + "learning_rate": 3.2516374715879126e-05, + "loss": 0.0418, + "step": 2972, + "task_loss": 0.02895108424127102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7728910643316589, + "compression_loss": 0.0, + "distillation_loss": 0.1767425686120987, + "epoch": 2.82, + "learning_rate": 3.250621177433097e-05, + "loss": 0.1788, + "step": 2973, + "task_loss": 0.197010338306427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.772905978471615, + "compression_loss": 0.0, + "distillation_loss": 0.12274418771266937, + "epoch": 2.82, + "learning_rate": 3.249604746924331e-05, + "loss": 0.1189, + "step": 2974, + "task_loss": 0.08417470753192902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7729208871405002, + "compression_loss": 0.0, + "distillation_loss": 0.023153632879257202, + "epoch": 2.83, + "learning_rate": 3.248588180246251e-05, + "loss": 0.0321, + "step": 2975, + "task_loss": 0.11221934109926224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7729357903393179, + "compression_loss": 0.0, + "distillation_loss": 0.24052870273590088, + "epoch": 2.83, + "learning_rate": 3.247571477583523e-05, + "loss": 0.248, + "step": 2976, + "task_loss": 0.31512880325317383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7729506880690719, + "compression_loss": 0.0, + "distillation_loss": 0.11706255376338959, + "epoch": 2.83, + "learning_rate": 3.2465546391208355e-05, + "loss": 0.1414, + "step": 2977, + "task_loss": 0.36023640632629395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7729655803307658, + "compression_loss": 0.0, + "distillation_loss": 0.24018515646457672, + "epoch": 2.83, + "learning_rate": 3.245537665042903e-05, + "loss": 0.2329, + "step": 2978, + "task_loss": 0.1674792766571045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7729804671254034, + "compression_loss": 0.0, + "distillation_loss": 0.12150129675865173, + "epoch": 2.83, + "learning_rate": 3.244520555534463e-05, + "loss": 0.1159, + "step": 2979, + "task_loss": 0.06535904854536057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7729953484539883, + "compression_loss": 0.0, + "distillation_loss": 0.09363529086112976, + "epoch": 2.83, + "learning_rate": 3.243503310780278e-05, + "loss": 0.0944, + "step": 2980, + "task_loss": 0.10123631358146667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730102243175242, + "compression_loss": 0.0, + "distillation_loss": 0.060428209602832794, + "epoch": 2.83, + "learning_rate": 3.242485930965136e-05, + "loss": 0.0578, + "step": 2981, + "task_loss": 0.03405189514160156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730250947170147, + "compression_loss": 0.0, + "distillation_loss": 0.03489404916763306, + "epoch": 2.83, + "learning_rate": 3.241468416273849e-05, + "loss": 0.0514, + "step": 2982, + "task_loss": 0.2001369595527649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730399596534637, + "compression_loss": 0.0, + "distillation_loss": 0.026621265336871147, + "epoch": 2.83, + "learning_rate": 3.2404507668912534e-05, + "loss": 0.0257, + "step": 2983, + "task_loss": 0.017897440120577812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730548191278748, + "compression_loss": 0.0, + "distillation_loss": 0.06251438707113266, + "epoch": 2.83, + "learning_rate": 3.2394329830022095e-05, + "loss": 0.0658, + "step": 2984, + "task_loss": 0.0951443687081337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730696731412514, + "compression_loss": 0.0, + "distillation_loss": 0.06263174116611481, + "epoch": 2.83, + "learning_rate": 3.238415064791603e-05, + "loss": 0.0604, + "step": 2985, + "task_loss": 0.03981752693653107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730845216945976, + "compression_loss": 0.0, + "distillation_loss": 0.04605867341160774, + "epoch": 2.84, + "learning_rate": 3.237397012444344e-05, + "loss": 0.056, + "step": 2986, + "task_loss": 0.145818829536438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7730993647889168, + "compression_loss": 0.0, + "distillation_loss": 0.08241648226976395, + "epoch": 2.84, + "learning_rate": 3.2363788261453664e-05, + "loss": 0.0888, + "step": 2987, + "task_loss": 0.14630846679210663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7731142024252129, + "compression_loss": 0.0, + "distillation_loss": 0.027548307552933693, + "epoch": 2.84, + "learning_rate": 3.2353605060796286e-05, + "loss": 0.0253, + "step": 2988, + "task_loss": 0.004967987537384033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7731290346044893, + "compression_loss": 0.0, + "distillation_loss": 0.17513786256313324, + "epoch": 2.84, + "learning_rate": 3.2343420524321134e-05, + "loss": 0.1683, + "step": 2989, + "task_loss": 0.1069210097193718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.77314386132775, + "compression_loss": 0.0, + "distillation_loss": 0.0309540294110775, + "epoch": 2.84, + "learning_rate": 3.2333234653878275e-05, + "loss": 0.033, + "step": 2990, + "task_loss": 0.051752254366874695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7731586825959984, + "compression_loss": 0.0, + "distillation_loss": 0.07692794501781464, + "epoch": 2.84, + "learning_rate": 3.2323047451318023e-05, + "loss": 0.0828, + "step": 2991, + "task_loss": 0.1354198455810547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7731734984102384, + "compression_loss": 0.0, + "distillation_loss": 0.23258818686008453, + "epoch": 2.84, + "learning_rate": 3.2312858918490936e-05, + "loss": 0.2235, + "step": 2992, + "task_loss": 0.1417001634836197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7731883087714736, + "compression_loss": 0.0, + "distillation_loss": 0.07296988368034363, + "epoch": 2.84, + "learning_rate": 3.2302669057247806e-05, + "loss": 0.0679, + "step": 2993, + "task_loss": 0.02189962938427925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7732031136807076, + "compression_loss": 0.0, + "distillation_loss": 0.13466548919677734, + "epoch": 2.84, + "learning_rate": 3.2292477869439666e-05, + "loss": 0.1294, + "step": 2994, + "task_loss": 0.08172232657670975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7732179131389442, + "compression_loss": 0.0, + "distillation_loss": 0.1007700264453888, + "epoch": 2.84, + "learning_rate": 3.228228535691781e-05, + "loss": 0.0913, + "step": 2995, + "task_loss": 0.0059202127158641815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.773232707147187, + "compression_loss": 0.0, + "distillation_loss": 0.13270485401153564, + "epoch": 2.85, + "learning_rate": 3.2272091521533745e-05, + "loss": 0.1522, + "step": 2996, + "task_loss": 0.3279738426208496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7732474957064397, + "compression_loss": 0.0, + "distillation_loss": 0.09864410758018494, + "epoch": 2.85, + "learning_rate": 3.226189636513923e-05, + "loss": 0.0946, + "step": 2997, + "task_loss": 0.05842628702521324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.773262278817706, + "compression_loss": 0.0, + "distillation_loss": 0.14679734408855438, + "epoch": 2.85, + "learning_rate": 3.225169988958627e-05, + "loss": 0.1396, + "step": 2998, + "task_loss": 0.07466141879558563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7732770564819896, + "compression_loss": 0.0, + "distillation_loss": 0.09835858643054962, + "epoch": 2.85, + "learning_rate": 3.2241502096727095e-05, + "loss": 0.0922, + "step": 2999, + "task_loss": 0.03713885694742203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7732918287002941, + "compression_loss": 0.0, + "distillation_loss": 0.08745696395635605, + "epoch": 2.85, + "learning_rate": 3.2231302988414194e-05, + "loss": 0.0818, + "step": 3000, + "task_loss": 0.030404910445213318 + }, + { + "epoch": 2.85, + "eval_accuracy": 0.8899082568807339, + "eval_loss": 0.41417789459228516, + "eval_runtime": 17.9806, + "eval_samples_per_second": 48.497, + "eval_steps_per_second": 6.062, + "step": 3000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733065954736233, + "compression_loss": 0.0, + "distillation_loss": 0.16211673617362976, + "epoch": 2.85, + "learning_rate": 3.222110256650028e-05, + "loss": 0.1516, + "step": 3001, + "task_loss": 0.05707669258117676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733213568029809, + "compression_loss": 0.0, + "distillation_loss": 0.10503898561000824, + "epoch": 2.85, + "learning_rate": 3.2210900832838295e-05, + "loss": 0.1082, + "step": 3002, + "task_loss": 0.13650718331336975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733361126893704, + "compression_loss": 0.0, + "distillation_loss": 0.037988051772117615, + "epoch": 2.85, + "learning_rate": 3.220069778928146e-05, + "loss": 0.0421, + "step": 3003, + "task_loss": 0.0787007063627243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733508631337956, + "compression_loss": 0.0, + "distillation_loss": 0.10283227264881134, + "epoch": 2.85, + "learning_rate": 3.2190493437683185e-05, + "loss": 0.1037, + "step": 3004, + "task_loss": 0.11105208098888397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733656081372603, + "compression_loss": 0.0, + "distillation_loss": 0.1534404307603836, + "epoch": 2.85, + "learning_rate": 3.2180287779897155e-05, + "loss": 0.1409, + "step": 3005, + "task_loss": 0.028170330449938774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733803477007679, + "compression_loss": 0.0, + "distillation_loss": 0.03912707045674324, + "epoch": 2.85, + "learning_rate": 3.217008081777726e-05, + "loss": 0.0443, + "step": 3006, + "task_loss": 0.09075871109962463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7733950818253224, + "compression_loss": 0.0, + "distillation_loss": 0.08084475994110107, + "epoch": 2.86, + "learning_rate": 3.2159872553177655e-05, + "loss": 0.0756, + "step": 3007, + "task_loss": 0.028212103992700577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734098105119271, + "compression_loss": 0.0, + "distillation_loss": 0.09292879700660706, + "epoch": 2.86, + "learning_rate": 3.2149662987952725e-05, + "loss": 0.0965, + "step": 3008, + "task_loss": 0.12906073033809662 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734245337615862, + "compression_loss": 0.0, + "distillation_loss": 0.04876542091369629, + "epoch": 2.86, + "learning_rate": 3.213945212395707e-05, + "loss": 0.0478, + "step": 3009, + "task_loss": 0.039374131709337234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734392515753028, + "compression_loss": 0.0, + "distillation_loss": 0.061346620321273804, + "epoch": 2.86, + "learning_rate": 3.212923996304556e-05, + "loss": 0.0648, + "step": 3010, + "task_loss": 0.09587064385414124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734539639540811, + "compression_loss": 0.0, + "distillation_loss": 0.18387816846370697, + "epoch": 2.86, + "learning_rate": 3.211902650707327e-05, + "loss": 0.1763, + "step": 3011, + "task_loss": 0.10823600739240646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734686708989243, + "compression_loss": 0.0, + "distillation_loss": 0.08546897023916245, + "epoch": 2.86, + "learning_rate": 3.210881175789553e-05, + "loss": 0.0837, + "step": 3012, + "task_loss": 0.06747668981552124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734833724108365, + "compression_loss": 0.0, + "distillation_loss": 0.26206955313682556, + "epoch": 2.86, + "learning_rate": 3.209859571736791e-05, + "loss": 0.2582, + "step": 3013, + "task_loss": 0.2230200469493866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7734980684908211, + "compression_loss": 0.0, + "distillation_loss": 0.02551981247961521, + "epoch": 2.86, + "learning_rate": 3.208837838734618e-05, + "loss": 0.0314, + "step": 3014, + "task_loss": 0.08437542617321014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.773512759139882, + "compression_loss": 0.0, + "distillation_loss": 0.11942901462316513, + "epoch": 2.86, + "learning_rate": 3.207815976968638e-05, + "loss": 0.1173, + "step": 3015, + "task_loss": 0.0981997698545456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7735274443590228, + "compression_loss": 0.0, + "distillation_loss": 0.03865540400147438, + "epoch": 2.86, + "learning_rate": 3.2067939866244764e-05, + "loss": 0.0452, + "step": 3016, + "task_loss": 0.10382010787725449 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7735421241492471, + "compression_loss": 0.0, + "distillation_loss": 0.1100415512919426, + "epoch": 2.87, + "learning_rate": 3.205771867887784e-05, + "loss": 0.113, + "step": 3017, + "task_loss": 0.13981683552265167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7735567985115586, + "compression_loss": 0.0, + "distillation_loss": 0.19163449108600616, + "epoch": 2.87, + "learning_rate": 3.204749620944232e-05, + "loss": 0.1836, + "step": 3018, + "task_loss": 0.11117150634527206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.773571467446961, + "compression_loss": 0.0, + "distillation_loss": 0.042591970413923264, + "epoch": 2.87, + "learning_rate": 3.203727245979518e-05, + "loss": 0.0501, + "step": 3019, + "task_loss": 0.11770500242710114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7735861309564581, + "compression_loss": 0.0, + "distillation_loss": 0.17520135641098022, + "epoch": 2.87, + "learning_rate": 3.20270474317936e-05, + "loss": 0.173, + "step": 3020, + "task_loss": 0.15289539098739624 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736007890410534, + "compression_loss": 0.0, + "distillation_loss": 0.058556366711854935, + "epoch": 2.87, + "learning_rate": 3.201682112729502e-05, + "loss": 0.0701, + "step": 3021, + "task_loss": 0.17429102957248688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736154417017508, + "compression_loss": 0.0, + "distillation_loss": 0.12820349633693695, + "epoch": 2.87, + "learning_rate": 3.2006593548157074e-05, + "loss": 0.1274, + "step": 3022, + "task_loss": 0.11984600871801376 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736300889395538, + "compression_loss": 0.0, + "distillation_loss": 0.12782520055770874, + "epoch": 2.87, + "learning_rate": 3.1996364696237676e-05, + "loss": 0.1207, + "step": 3023, + "task_loss": 0.05618831515312195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736447307554661, + "compression_loss": 0.0, + "distillation_loss": 0.073527991771698, + "epoch": 2.87, + "learning_rate": 3.198613457339493e-05, + "loss": 0.0798, + "step": 3024, + "task_loss": 0.13668204843997955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736593671504914, + "compression_loss": 0.0, + "distillation_loss": 0.017919428646564484, + "epoch": 2.87, + "learning_rate": 3.19759031814872e-05, + "loss": 0.0165, + "step": 3025, + "task_loss": 0.0033756643533706665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736739981256334, + "compression_loss": 0.0, + "distillation_loss": 0.03155189007520676, + "epoch": 2.87, + "learning_rate": 3.196567052237306e-05, + "loss": 0.0288, + "step": 3026, + "task_loss": 0.003785140812397003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7736886236818958, + "compression_loss": 0.0, + "distillation_loss": 0.10638611018657684, + "epoch": 2.87, + "learning_rate": 3.195543659791132e-05, + "loss": 0.1069, + "step": 3027, + "task_loss": 0.11161436140537262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737032438202823, + "compression_loss": 0.0, + "distillation_loss": 0.19940927624702454, + "epoch": 2.88, + "learning_rate": 3.194520140996102e-05, + "loss": 0.1983, + "step": 3028, + "task_loss": 0.1886705756187439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737178585417965, + "compression_loss": 0.0, + "distillation_loss": 0.0578513965010643, + "epoch": 2.88, + "learning_rate": 3.193496496038144e-05, + "loss": 0.0666, + "step": 3029, + "task_loss": 0.1448466032743454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737324678474422, + "compression_loss": 0.0, + "distillation_loss": 0.18462207913398743, + "epoch": 2.88, + "learning_rate": 3.1924727251032075e-05, + "loss": 0.1751, + "step": 3030, + "task_loss": 0.08917995542287827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737470717382229, + "compression_loss": 0.0, + "distillation_loss": 0.0979708880186081, + "epoch": 2.88, + "learning_rate": 3.191448828377267e-05, + "loss": 0.1038, + "step": 3031, + "task_loss": 0.15621528029441833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737616702151424, + "compression_loss": 0.0, + "distillation_loss": 0.06346909701824188, + "epoch": 2.88, + "learning_rate": 3.1904248060463146e-05, + "loss": 0.0632, + "step": 3032, + "task_loss": 0.06106296554207802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737762632792045, + "compression_loss": 0.0, + "distillation_loss": 0.08692383021116257, + "epoch": 2.88, + "learning_rate": 3.189400658296372e-05, + "loss": 0.0864, + "step": 3033, + "task_loss": 0.08201731741428375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7737908509314126, + "compression_loss": 0.0, + "distillation_loss": 0.06603842228651047, + "epoch": 2.88, + "learning_rate": 3.188376385313479e-05, + "loss": 0.0611, + "step": 3034, + "task_loss": 0.016200372949242592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7738054331727706, + "compression_loss": 0.0, + "distillation_loss": 0.1842041015625, + "epoch": 2.88, + "learning_rate": 3.187351987283701e-05, + "loss": 0.1758, + "step": 3035, + "task_loss": 0.10015082359313965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7738200100042821, + "compression_loss": 0.0, + "distillation_loss": 0.1197400689125061, + "epoch": 2.88, + "learning_rate": 3.1863274643931244e-05, + "loss": 0.1273, + "step": 3036, + "task_loss": 0.19555895030498505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7738345814269508, + "compression_loss": 0.0, + "distillation_loss": 0.24975571036338806, + "epoch": 2.88, + "learning_rate": 3.185302816827858e-05, + "loss": 0.242, + "step": 3037, + "task_loss": 0.17197510600090027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7738491474417805, + "compression_loss": 0.0, + "distillation_loss": 0.0718345120549202, + "epoch": 2.89, + "learning_rate": 3.184278044774035e-05, + "loss": 0.0706, + "step": 3038, + "task_loss": 0.0589919239282608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7738637080497746, + "compression_loss": 0.0, + "distillation_loss": 0.17170128226280212, + "epoch": 2.89, + "learning_rate": 3.183253148417808e-05, + "loss": 0.1797, + "step": 3039, + "task_loss": 0.25216758251190186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.773878263251937, + "compression_loss": 0.0, + "distillation_loss": 0.17050856351852417, + "epoch": 2.89, + "learning_rate": 3.182228127945358e-05, + "loss": 0.1782, + "step": 3040, + "task_loss": 0.2472606599330902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7738928130492714, + "compression_loss": 0.0, + "distillation_loss": 0.09137849509716034, + "epoch": 2.89, + "learning_rate": 3.1812029835428825e-05, + "loss": 0.0876, + "step": 3041, + "task_loss": 0.053437259048223495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7739073574427813, + "compression_loss": 0.0, + "distillation_loss": 0.08643493056297302, + "epoch": 2.89, + "learning_rate": 3.1801777153966034e-05, + "loss": 0.1166, + "step": 3042, + "task_loss": 0.3879551291465759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7739218964334705, + "compression_loss": 0.0, + "distillation_loss": 0.06834743916988373, + "epoch": 2.89, + "learning_rate": 3.179152323692767e-05, + "loss": 0.0775, + "step": 3043, + "task_loss": 0.16033974289894104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7739364300223428, + "compression_loss": 0.0, + "distillation_loss": 0.07843412458896637, + "epoch": 2.89, + "learning_rate": 3.1781268086176406e-05, + "loss": 0.0788, + "step": 3044, + "task_loss": 0.08182865381240845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7739509582104016, + "compression_loss": 0.0, + "distillation_loss": 0.09358078241348267, + "epoch": 2.89, + "learning_rate": 3.177101170357513e-05, + "loss": 0.0883, + "step": 3045, + "task_loss": 0.04088345542550087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7739654809986508, + "compression_loss": 0.0, + "distillation_loss": 0.030289195477962494, + "epoch": 2.89, + "learning_rate": 3.1760754090986975e-05, + "loss": 0.028, + "step": 3046, + "task_loss": 0.007276715710759163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7739799983880941, + "compression_loss": 0.0, + "distillation_loss": 0.06697987020015717, + "epoch": 2.89, + "learning_rate": 3.175049525027527e-05, + "loss": 0.0772, + "step": 3047, + "task_loss": 0.16944551467895508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.773994510379735, + "compression_loss": 0.0, + "distillation_loss": 0.09630005806684494, + "epoch": 2.89, + "learning_rate": 3.17402351833036e-05, + "loss": 0.0926, + "step": 3048, + "task_loss": 0.059294380247592926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740090169745774, + "compression_loss": 0.0, + "distillation_loss": 0.04504215717315674, + "epoch": 2.9, + "learning_rate": 3.1729973891935745e-05, + "loss": 0.0579, + "step": 3049, + "task_loss": 0.17366653680801392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740235181736247, + "compression_loss": 0.0, + "distillation_loss": 0.024151597172021866, + "epoch": 2.9, + "learning_rate": 3.1719711378035714e-05, + "loss": 0.0223, + "step": 3050, + "task_loss": 0.005144516006112099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740380139778809, + "compression_loss": 0.0, + "distillation_loss": 0.07286585122346878, + "epoch": 2.9, + "learning_rate": 3.1709447643467755e-05, + "loss": 0.0733, + "step": 3051, + "task_loss": 0.07735887914896011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740525043883495, + "compression_loss": 0.0, + "distillation_loss": 0.12737995386123657, + "epoch": 2.9, + "learning_rate": 3.1699182690096316e-05, + "loss": 0.1227, + "step": 3052, + "task_loss": 0.08107961714267731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740669894060341, + "compression_loss": 0.0, + "distillation_loss": 0.13144919276237488, + "epoch": 2.9, + "learning_rate": 3.168891651978609e-05, + "loss": 0.1325, + "step": 3053, + "task_loss": 0.14158479869365692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740814690319386, + "compression_loss": 0.0, + "distillation_loss": 0.04931913688778877, + "epoch": 2.9, + "learning_rate": 3.167864913440195e-05, + "loss": 0.0455, + "step": 3054, + "task_loss": 0.011026356369256973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7740959432670665, + "compression_loss": 0.0, + "distillation_loss": 0.032145917415618896, + "epoch": 2.9, + "learning_rate": 3.1668380535809036e-05, + "loss": 0.0341, + "step": 3055, + "task_loss": 0.05174173414707184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7741104121124217, + "compression_loss": 0.0, + "distillation_loss": 0.04694236442446709, + "epoch": 2.9, + "learning_rate": 3.165811072587267e-05, + "loss": 0.0486, + "step": 3056, + "task_loss": 0.06309117376804352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7741248755690077, + "compression_loss": 0.0, + "distillation_loss": 0.13567635416984558, + "epoch": 2.9, + "learning_rate": 3.164783970645845e-05, + "loss": 0.1325, + "step": 3057, + "task_loss": 0.10343907028436661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7741393336378282, + "compression_loss": 0.0, + "distillation_loss": 0.11043036729097366, + "epoch": 2.9, + "learning_rate": 3.1637567479432113e-05, + "loss": 0.1153, + "step": 3058, + "task_loss": 0.15953701734542847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7741537863198868, + "compression_loss": 0.0, + "distillation_loss": 0.06733449548482895, + "epoch": 2.91, + "learning_rate": 3.1627294046659675e-05, + "loss": 0.0654, + "step": 3059, + "task_loss": 0.04814068600535393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7741682336161875, + "compression_loss": 0.0, + "distillation_loss": 0.030272886157035828, + "epoch": 2.91, + "learning_rate": 3.1617019410007366e-05, + "loss": 0.0279, + "step": 3060, + "task_loss": 0.006075674667954445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7741826755277337, + "compression_loss": 0.0, + "distillation_loss": 0.16811983287334442, + "epoch": 2.91, + "learning_rate": 3.16067435713416e-05, + "loss": 0.1535, + "step": 3061, + "task_loss": 0.021725336089730263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.774197112055529, + "compression_loss": 0.0, + "distillation_loss": 0.024632154032588005, + "epoch": 2.91, + "learning_rate": 3.159646653252906e-05, + "loss": 0.0229, + "step": 3062, + "task_loss": 0.007006222382187843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7742115432005774, + "compression_loss": 0.0, + "distillation_loss": 0.19798026978969574, + "epoch": 2.91, + "learning_rate": 3.1586188295436594e-05, + "loss": 0.1975, + "step": 3063, + "task_loss": 0.19281329214572906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7742259689638823, + "compression_loss": 0.0, + "distillation_loss": 0.08180099725723267, + "epoch": 2.91, + "learning_rate": 3.1575908861931314e-05, + "loss": 0.0904, + "step": 3064, + "task_loss": 0.16796118021011353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7742403893464476, + "compression_loss": 0.0, + "distillation_loss": 0.0645010769367218, + "epoch": 2.91, + "learning_rate": 3.156562823388051e-05, + "loss": 0.0614, + "step": 3065, + "task_loss": 0.03324957937002182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7742548043492768, + "compression_loss": 0.0, + "distillation_loss": 0.10893145948648453, + "epoch": 2.91, + "learning_rate": 3.155534641315172e-05, + "loss": 0.1009, + "step": 3066, + "task_loss": 0.028400206938385963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7742692139733738, + "compression_loss": 0.0, + "distillation_loss": 0.032349854707717896, + "epoch": 2.91, + "learning_rate": 3.154506340161269e-05, + "loss": 0.0305, + "step": 3067, + "task_loss": 0.013784002512693405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.774283618219742, + "compression_loss": 0.0, + "distillation_loss": 0.19412454962730408, + "epoch": 2.91, + "learning_rate": 3.1534779201131366e-05, + "loss": 0.1937, + "step": 3068, + "task_loss": 0.1897927224636078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7742980170893853, + "compression_loss": 0.0, + "distillation_loss": 0.02269315905869007, + "epoch": 2.91, + "learning_rate": 3.152449381357593e-05, + "loss": 0.021, + "step": 3069, + "task_loss": 0.005848288536071777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7743124105833072, + "compression_loss": 0.0, + "distillation_loss": 0.17387601733207703, + "epoch": 2.92, + "learning_rate": 3.151420724081478e-05, + "loss": 0.1707, + "step": 3070, + "task_loss": 0.14171260595321655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7743267987025116, + "compression_loss": 0.0, + "distillation_loss": 0.0965137854218483, + "epoch": 2.92, + "learning_rate": 3.1503919484716495e-05, + "loss": 0.0902, + "step": 3071, + "task_loss": 0.03351738303899765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7743411814480021, + "compression_loss": 0.0, + "distillation_loss": 0.05635223537683487, + "epoch": 2.92, + "learning_rate": 3.149363054714992e-05, + "loss": 0.056, + "step": 3072, + "task_loss": 0.05248015746474266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7743555588207823, + "compression_loss": 0.0, + "distillation_loss": 0.12340125441551208, + "epoch": 2.92, + "learning_rate": 3.148334042998408e-05, + "loss": 0.1179, + "step": 3073, + "task_loss": 0.06886960566043854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.774369930821856, + "compression_loss": 0.0, + "distillation_loss": 0.06106632202863693, + "epoch": 2.92, + "learning_rate": 3.1473049135088225e-05, + "loss": 0.0575, + "step": 3074, + "task_loss": 0.025458548218011856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7743842974522268, + "compression_loss": 0.0, + "distillation_loss": 0.04669322818517685, + "epoch": 2.92, + "learning_rate": 3.146275666433183e-05, + "loss": 0.0486, + "step": 3075, + "task_loss": 0.06542003899812698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7743986587128984, + "compression_loss": 0.0, + "distillation_loss": 0.15752683579921722, + "epoch": 2.92, + "learning_rate": 3.145246301958455e-05, + "loss": 0.1552, + "step": 3076, + "task_loss": 0.13474304974079132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744130146048744, + "compression_loss": 0.0, + "distillation_loss": 0.040889352560043335, + "epoch": 2.92, + "learning_rate": 3.14421682027163e-05, + "loss": 0.0456, + "step": 3077, + "task_loss": 0.08812226355075836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744273651291587, + "compression_loss": 0.0, + "distillation_loss": 0.07757270336151123, + "epoch": 2.92, + "learning_rate": 3.143187221559715e-05, + "loss": 0.0816, + "step": 3078, + "task_loss": 0.11832978576421738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744417102867547, + "compression_loss": 0.0, + "distillation_loss": 0.04706088453531265, + "epoch": 2.92, + "learning_rate": 3.142157506009746e-05, + "loss": 0.0517, + "step": 3079, + "task_loss": 0.0932609811425209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744560500786662, + "compression_loss": 0.0, + "distillation_loss": 0.17205679416656494, + "epoch": 2.92, + "learning_rate": 3.141127673808772e-05, + "loss": 0.1766, + "step": 3080, + "task_loss": 0.21797949075698853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744703845058971, + "compression_loss": 0.0, + "distillation_loss": 0.11091163754463196, + "epoch": 2.93, + "learning_rate": 3.140097725143868e-05, + "loss": 0.1031, + "step": 3081, + "task_loss": 0.03264191001653671 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744847135694508, + "compression_loss": 0.0, + "distillation_loss": 0.17644111812114716, + "epoch": 2.93, + "learning_rate": 3.13906766020213e-05, + "loss": 0.1904, + "step": 3082, + "task_loss": 0.3157804012298584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7744990372703311, + "compression_loss": 0.0, + "distillation_loss": 0.037624701857566833, + "epoch": 2.93, + "learning_rate": 3.138037479170674e-05, + "loss": 0.0394, + "step": 3083, + "task_loss": 0.05507116764783859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7745133556095416, + "compression_loss": 0.0, + "distillation_loss": 0.14321953058242798, + "epoch": 2.93, + "learning_rate": 3.137007182236637e-05, + "loss": 0.1347, + "step": 3084, + "task_loss": 0.05840989202260971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7745276685880862, + "compression_loss": 0.0, + "distillation_loss": 0.03971727564930916, + "epoch": 2.93, + "learning_rate": 3.1359767695871767e-05, + "loss": 0.0431, + "step": 3085, + "task_loss": 0.07382907718420029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7745419762069683, + "compression_loss": 0.0, + "distillation_loss": 0.3982169032096863, + "epoch": 2.93, + "learning_rate": 3.134946241409474e-05, + "loss": 0.3973, + "step": 3086, + "task_loss": 0.3887830376625061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7745562784671917, + "compression_loss": 0.0, + "distillation_loss": 0.03360104188323021, + "epoch": 2.93, + "learning_rate": 3.133915597890729e-05, + "loss": 0.0468, + "step": 3087, + "task_loss": 0.1660730242729187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.77457057536976, + "compression_loss": 0.0, + "distillation_loss": 0.030526025220751762, + "epoch": 2.93, + "learning_rate": 3.132884839218162e-05, + "loss": 0.0331, + "step": 3088, + "task_loss": 0.056509580463171005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7745848669156772, + "compression_loss": 0.0, + "distillation_loss": 0.02792483940720558, + "epoch": 2.93, + "learning_rate": 3.131853965579016e-05, + "loss": 0.0285, + "step": 3089, + "task_loss": 0.03412621468305588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7745991531059465, + "compression_loss": 0.0, + "distillation_loss": 0.10251462459564209, + "epoch": 2.93, + "learning_rate": 3.130822977160554e-05, + "loss": 0.0957, + "step": 3090, + "task_loss": 0.03391581028699875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.774613433941572, + "compression_loss": 0.0, + "distillation_loss": 0.04014907404780388, + "epoch": 2.94, + "learning_rate": 3.129791874150062e-05, + "loss": 0.0426, + "step": 3091, + "task_loss": 0.0648120641708374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7746277094235572, + "compression_loss": 0.0, + "distillation_loss": 0.0542302206158638, + "epoch": 2.94, + "learning_rate": 3.1287606567348406e-05, + "loss": 0.0595, + "step": 3092, + "task_loss": 0.10736650973558426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7746419795529058, + "compression_loss": 0.0, + "distillation_loss": 0.02264413796365261, + "epoch": 2.94, + "learning_rate": 3.1277293251022185e-05, + "loss": 0.0265, + "step": 3093, + "task_loss": 0.0615130253136158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7746562443306214, + "compression_loss": 0.0, + "distillation_loss": 0.16324403882026672, + "epoch": 2.94, + "learning_rate": 3.126697879439541e-05, + "loss": 0.1576, + "step": 3094, + "task_loss": 0.10708633810281754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7746705037577079, + "compression_loss": 0.0, + "distillation_loss": 0.025752229616045952, + "epoch": 2.94, + "learning_rate": 3.1256663199341764e-05, + "loss": 0.0244, + "step": 3095, + "task_loss": 0.012307420372962952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7746847578351687, + "compression_loss": 0.0, + "distillation_loss": 0.02341257967054844, + "epoch": 2.94, + "learning_rate": 3.124634646773511e-05, + "loss": 0.0219, + "step": 3096, + "task_loss": 0.008523575961589813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7746990065640077, + "compression_loss": 0.0, + "distillation_loss": 0.11951486021280289, + "epoch": 2.94, + "learning_rate": 3.1236028601449534e-05, + "loss": 0.1255, + "step": 3097, + "task_loss": 0.17937280237674713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747132499452285, + "compression_loss": 0.0, + "distillation_loss": 0.1342199444770813, + "epoch": 2.94, + "learning_rate": 3.1225709602359335e-05, + "loss": 0.1285, + "step": 3098, + "task_loss": 0.07676542550325394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747274879798347, + "compression_loss": 0.0, + "distillation_loss": 0.04866989701986313, + "epoch": 2.94, + "learning_rate": 3.1215389472338995e-05, + "loss": 0.0541, + "step": 3099, + "task_loss": 0.10313687473535538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747417206688302, + "compression_loss": 0.0, + "distillation_loss": 0.09249399602413177, + "epoch": 2.94, + "learning_rate": 3.1205068213263234e-05, + "loss": 0.1012, + "step": 3100, + "task_loss": 0.17944881319999695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747559480132186, + "compression_loss": 0.0, + "distillation_loss": 0.05482405051589012, + "epoch": 2.94, + "learning_rate": 3.119474582700695e-05, + "loss": 0.0604, + "step": 3101, + "task_loss": 0.11096543818712234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747701700140034, + "compression_loss": 0.0, + "distillation_loss": 0.02817433886229992, + "epoch": 2.95, + "learning_rate": 3.118442231544524e-05, + "loss": 0.0331, + "step": 3102, + "task_loss": 0.07782773673534393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747843866721884, + "compression_loss": 0.0, + "distillation_loss": 0.03213953226804733, + "epoch": 2.95, + "learning_rate": 3.117409768045344e-05, + "loss": 0.0392, + "step": 3103, + "task_loss": 0.10243944823741913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7747985979887774, + "compression_loss": 0.0, + "distillation_loss": 0.2645414471626282, + "epoch": 2.95, + "learning_rate": 3.116377192390706e-05, + "loss": 0.2525, + "step": 3104, + "task_loss": 0.14380747079849243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7748128039647739, + "compression_loss": 0.0, + "distillation_loss": 0.03608560562133789, + "epoch": 2.95, + "learning_rate": 3.115344504768183e-05, + "loss": 0.044, + "step": 3105, + "task_loss": 0.115634024143219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7748270046011818, + "compression_loss": 0.0, + "distillation_loss": 0.0855831503868103, + "epoch": 2.95, + "learning_rate": 3.1143117053653665e-05, + "loss": 0.0873, + "step": 3106, + "task_loss": 0.10302843898534775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7748411998990046, + "compression_loss": 0.0, + "distillation_loss": 0.1709257960319519, + "epoch": 2.95, + "learning_rate": 3.113278794369869e-05, + "loss": 0.1625, + "step": 3107, + "task_loss": 0.08654191344976425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.774855389859246, + "compression_loss": 0.0, + "distillation_loss": 0.012697904370725155, + "epoch": 2.95, + "learning_rate": 3.112245771969327e-05, + "loss": 0.0119, + "step": 3108, + "task_loss": 0.004779975861310959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7748695744829096, + "compression_loss": 0.0, + "distillation_loss": 0.12192719429731369, + "epoch": 2.95, + "learning_rate": 3.1112126383513914e-05, + "loss": 0.1268, + "step": 3109, + "task_loss": 0.1710333675146103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7748837537709993, + "compression_loss": 0.0, + "distillation_loss": 0.09610234200954437, + "epoch": 2.95, + "learning_rate": 3.110179393703737e-05, + "loss": 0.1014, + "step": 3110, + "task_loss": 0.1487446278333664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7748979277245187, + "compression_loss": 0.0, + "distillation_loss": 0.12404096126556396, + "epoch": 2.95, + "learning_rate": 3.109146038214055e-05, + "loss": 0.1215, + "step": 3111, + "task_loss": 0.09883137792348862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749120963444714, + "compression_loss": 0.0, + "distillation_loss": 0.07528949528932571, + "epoch": 2.96, + "learning_rate": 3.108112572070063e-05, + "loss": 0.0717, + "step": 3112, + "task_loss": 0.03988656401634216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749262596318612, + "compression_loss": 0.0, + "distillation_loss": 0.06444555521011353, + "epoch": 2.96, + "learning_rate": 3.1070789954594934e-05, + "loss": 0.0607, + "step": 3113, + "task_loss": 0.027471771463751793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749404175876917, + "compression_loss": 0.0, + "distillation_loss": 0.06135058403015137, + "epoch": 2.96, + "learning_rate": 3.1060453085701e-05, + "loss": 0.0668, + "step": 3114, + "task_loss": 0.11571139842271805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749545702129665, + "compression_loss": 0.0, + "distillation_loss": 0.15376602113246918, + "epoch": 2.96, + "learning_rate": 3.105011511589658e-05, + "loss": 0.1476, + "step": 3115, + "task_loss": 0.09177757799625397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749687175086896, + "compression_loss": 0.0, + "distillation_loss": 0.04827346280217171, + "epoch": 2.96, + "learning_rate": 3.103977604705961e-05, + "loss": 0.059, + "step": 3116, + "task_loss": 0.15540531277656555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749828594758643, + "compression_loss": 0.0, + "distillation_loss": 0.08922263234853745, + "epoch": 2.96, + "learning_rate": 3.102943588106824e-05, + "loss": 0.0912, + "step": 3117, + "task_loss": 0.10851528495550156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7749969961154946, + "compression_loss": 0.0, + "distillation_loss": 0.03960554301738739, + "epoch": 2.96, + "learning_rate": 3.10190946198008e-05, + "loss": 0.0461, + "step": 3118, + "task_loss": 0.10426989942789078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750111274285838, + "compression_loss": 0.0, + "distillation_loss": 0.05592392012476921, + "epoch": 2.96, + "learning_rate": 3.100875226513583e-05, + "loss": 0.0597, + "step": 3119, + "task_loss": 0.09348025172948837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750252534161359, + "compression_loss": 0.0, + "distillation_loss": 0.05171472951769829, + "epoch": 2.96, + "learning_rate": 3.099840881895208e-05, + "loss": 0.0476, + "step": 3120, + "task_loss": 0.010983137413859367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750393740791546, + "compression_loss": 0.0, + "distillation_loss": 0.09998922049999237, + "epoch": 2.96, + "learning_rate": 3.098806428312847e-05, + "loss": 0.0984, + "step": 3121, + "task_loss": 0.08365054428577423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750534894186435, + "compression_loss": 0.0, + "distillation_loss": 0.020870694890618324, + "epoch": 2.96, + "learning_rate": 3.097771865954415e-05, + "loss": 0.0196, + "step": 3122, + "task_loss": 0.008448204025626183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750675994356061, + "compression_loss": 0.0, + "distillation_loss": 0.1635720431804657, + "epoch": 2.97, + "learning_rate": 3.096737195007845e-05, + "loss": 0.1693, + "step": 3123, + "task_loss": 0.22086140513420105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750817041310464, + "compression_loss": 0.0, + "distillation_loss": 0.23271842300891876, + "epoch": 2.97, + "learning_rate": 3.0957024156610884e-05, + "loss": 0.2299, + "step": 3124, + "task_loss": 0.20500387251377106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7750958035059679, + "compression_loss": 0.0, + "distillation_loss": 0.0583171546459198, + "epoch": 2.97, + "learning_rate": 3.09466752810212e-05, + "loss": 0.0556, + "step": 3125, + "task_loss": 0.03121706284582615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751098975613742, + "compression_loss": 0.0, + "distillation_loss": 0.06426501274108887, + "epoch": 2.97, + "learning_rate": 3.093632532518931e-05, + "loss": 0.0598, + "step": 3126, + "task_loss": 0.019734475761651993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751239862982691, + "compression_loss": 0.0, + "distillation_loss": 0.15301446616649628, + "epoch": 2.97, + "learning_rate": 3.092597429099534e-05, + "loss": 0.1451, + "step": 3127, + "task_loss": 0.07339885830879211 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751380697176563, + "compression_loss": 0.0, + "distillation_loss": 0.10157543420791626, + "epoch": 2.97, + "learning_rate": 3.0915622180319585e-05, + "loss": 0.1026, + "step": 3128, + "task_loss": 0.11221370846033096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751521478205394, + "compression_loss": 0.0, + "distillation_loss": 0.10004980117082596, + "epoch": 2.97, + "learning_rate": 3.090526899504259e-05, + "loss": 0.096, + "step": 3129, + "task_loss": 0.05929484963417053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751662206079223, + "compression_loss": 0.0, + "distillation_loss": 0.035803135484457016, + "epoch": 2.97, + "learning_rate": 3.0894914737045034e-05, + "loss": 0.0331, + "step": 3130, + "task_loss": 0.009108470752835274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751802880808084, + "compression_loss": 0.0, + "distillation_loss": 0.12775352597236633, + "epoch": 2.97, + "learning_rate": 3.088455940820782e-05, + "loss": 0.1286, + "step": 3131, + "task_loss": 0.1366499364376068 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7751943502402016, + "compression_loss": 0.0, + "distillation_loss": 0.19691312313079834, + "epoch": 2.97, + "learning_rate": 3.087420301041206e-05, + "loss": 0.1927, + "step": 3132, + "task_loss": 0.15475985407829285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752084070871054, + "compression_loss": 0.0, + "distillation_loss": 0.19966569542884827, + "epoch": 2.98, + "learning_rate": 3.086384554553902e-05, + "loss": 0.1948, + "step": 3133, + "task_loss": 0.1510712206363678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752224586225235, + "compression_loss": 0.0, + "distillation_loss": 0.13692443072795868, + "epoch": 2.98, + "learning_rate": 3.0853487015470206e-05, + "loss": 0.1358, + "step": 3134, + "task_loss": 0.12599924206733704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752365048474598, + "compression_loss": 0.0, + "distillation_loss": 0.23906008899211884, + "epoch": 2.98, + "learning_rate": 3.084312742208728e-05, + "loss": 0.2498, + "step": 3135, + "task_loss": 0.34651899337768555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752505457629176, + "compression_loss": 0.0, + "distillation_loss": 0.03585006296634674, + "epoch": 2.98, + "learning_rate": 3.083276676727212e-05, + "loss": 0.034, + "step": 3136, + "task_loss": 0.016858315095305443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752645813699011, + "compression_loss": 0.0, + "distillation_loss": 0.10707780718803406, + "epoch": 2.98, + "learning_rate": 3.082240505290678e-05, + "loss": 0.1068, + "step": 3137, + "task_loss": 0.10397090762853622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752786116694135, + "compression_loss": 0.0, + "distillation_loss": 0.09790733456611633, + "epoch": 2.98, + "learning_rate": 3.081204228087353e-05, + "loss": 0.091, + "step": 3138, + "task_loss": 0.029314137995243073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7752926366624586, + "compression_loss": 0.0, + "distillation_loss": 0.298001766204834, + "epoch": 2.98, + "learning_rate": 3.08016784530548e-05, + "loss": 0.2974, + "step": 3139, + "task_loss": 0.2921411693096161 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753066563500404, + "compression_loss": 0.0, + "distillation_loss": 0.13080675899982452, + "epoch": 2.98, + "learning_rate": 3.0791313571333244e-05, + "loss": 0.1232, + "step": 3140, + "task_loss": 0.05510927364230156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753206707331621, + "compression_loss": 0.0, + "distillation_loss": 0.10794495791196823, + "epoch": 2.98, + "learning_rate": 3.078094763759168e-05, + "loss": 0.1087, + "step": 3141, + "task_loss": 0.1155327633023262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753346798128278, + "compression_loss": 0.0, + "distillation_loss": 0.0587034747004509, + "epoch": 2.98, + "learning_rate": 3.0770580653713146e-05, + "loss": 0.0565, + "step": 3142, + "task_loss": 0.03710935637354851 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753486835900409, + "compression_loss": 0.0, + "distillation_loss": 0.09703940898180008, + "epoch": 2.98, + "learning_rate": 3.076021262158084e-05, + "loss": 0.1103, + "step": 3143, + "task_loss": 0.22997155785560608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753626820658053, + "compression_loss": 0.0, + "distillation_loss": 0.0734797865152359, + "epoch": 2.99, + "learning_rate": 3.074984354307817e-05, + "loss": 0.0786, + "step": 3144, + "task_loss": 0.1250114142894745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753766752411244, + "compression_loss": 0.0, + "distillation_loss": 0.10003338009119034, + "epoch": 2.99, + "learning_rate": 3.073947342008873e-05, + "loss": 0.1057, + "step": 3145, + "task_loss": 0.15664803981781006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7753906631170021, + "compression_loss": 0.0, + "distillation_loss": 0.11784384399652481, + "epoch": 2.99, + "learning_rate": 3.07291022544963e-05, + "loss": 0.124, + "step": 3146, + "task_loss": 0.17987778782844543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7754046456944421, + "compression_loss": 0.0, + "distillation_loss": 0.14313513040542603, + "epoch": 2.99, + "learning_rate": 3.0718730048184855e-05, + "loss": 0.1502, + "step": 3147, + "task_loss": 0.2141174077987671 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.775418622974448, + "compression_loss": 0.0, + "distillation_loss": 0.027550848200917244, + "epoch": 2.99, + "learning_rate": 3.0708356803038556e-05, + "loss": 0.0306, + "step": 3148, + "task_loss": 0.05766863748431206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7754325949580235, + "compression_loss": 0.0, + "distillation_loss": 0.11986173689365387, + "epoch": 2.99, + "learning_rate": 3.069798252094175e-05, + "loss": 0.1198, + "step": 3149, + "task_loss": 0.11939980834722519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7754465616461722, + "compression_loss": 0.0, + "distillation_loss": 0.05384514853358269, + "epoch": 2.99, + "learning_rate": 3.068760720377897e-05, + "loss": 0.0502, + "step": 3150, + "task_loss": 0.017584798857569695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7754605230398979, + "compression_loss": 0.0, + "distillation_loss": 0.12095589935779572, + "epoch": 2.99, + "learning_rate": 3.067723085343496e-05, + "loss": 0.1254, + "step": 3151, + "task_loss": 0.16499733924865723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7754744791402043, + "compression_loss": 0.0, + "distillation_loss": 0.23978760838508606, + "epoch": 2.99, + "learning_rate": 3.066685347179462e-05, + "loss": 0.2298, + "step": 3152, + "task_loss": 0.1398588865995407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.775488429948095, + "compression_loss": 0.0, + "distillation_loss": 0.016089707612991333, + "epoch": 2.99, + "learning_rate": 3.065647506074306e-05, + "loss": 0.0148, + "step": 3153, + "task_loss": 0.002878313884139061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755023754645736, + "compression_loss": 0.0, + "distillation_loss": 0.13207751512527466, + "epoch": 3.0, + "learning_rate": 3.064609562216555e-05, + "loss": 0.1289, + "step": 3154, + "task_loss": 0.10021229833364487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755163156906439, + "compression_loss": 0.0, + "distillation_loss": 0.022115526720881462, + "epoch": 3.0, + "learning_rate": 3.063571515794759e-05, + "loss": 0.0208, + "step": 3155, + "task_loss": 0.008827542886137962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755302506273097, + "compression_loss": 0.0, + "distillation_loss": 0.07137614488601685, + "epoch": 3.0, + "learning_rate": 3.062533366997483e-05, + "loss": 0.0964, + "step": 3156, + "task_loss": 0.3213070034980774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755441802755745, + "compression_loss": 0.0, + "distillation_loss": 0.01968713104724884, + "epoch": 3.0, + "learning_rate": 3.061495116013311e-05, + "loss": 0.0209, + "step": 3157, + "task_loss": 0.03145528584718704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.775558104636442, + "compression_loss": 0.0, + "distillation_loss": 0.02357085794210434, + "epoch": 3.0, + "learning_rate": 3.060456763030847e-05, + "loss": 0.0219, + "step": 3158, + "task_loss": 0.006720980629324913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755720237109159, + "compression_loss": 0.0, + "distillation_loss": 0.11487483233213425, + "epoch": 3.0, + "learning_rate": 3.059418308238713e-05, + "loss": 0.1098, + "step": 3159, + "task_loss": 0.06400777399539948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755859375, + "compression_loss": 0.0, + "distillation_loss": 0.3807898461818695, + "epoch": 3.0, + "learning_rate": 3.0583797518255505e-05, + "loss": 0.3608, + "step": 3160, + "task_loss": 0.18057574331760406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7755998460046979, + "compression_loss": 0.0, + "distillation_loss": 0.08554988354444504, + "epoch": 3.0, + "learning_rate": 3.057341093980015e-05, + "loss": 0.081, + "step": 3161, + "task_loss": 0.04051545634865761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756137492260131, + "compression_loss": 0.0, + "distillation_loss": 0.5044205784797668, + "epoch": 3.0, + "learning_rate": 3.056302334890786e-05, + "loss": 0.4752, + "step": 3162, + "task_loss": 0.2119530588388443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756276471649496, + "compression_loss": 0.0, + "distillation_loss": 0.7463757395744324, + "epoch": 3.0, + "learning_rate": 3.055263474746559e-05, + "loss": 0.7219, + "step": 3163, + "task_loss": 0.5015002489089966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756415398225108, + "compression_loss": 0.0, + "distillation_loss": 0.15632954239845276, + "epoch": 3.0, + "learning_rate": 3.054224513736048e-05, + "loss": 0.1498, + "step": 3164, + "task_loss": 0.09135162830352783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756554271997006, + "compression_loss": 0.0, + "distillation_loss": 0.4045957326889038, + "epoch": 3.01, + "learning_rate": 3.0531854520479844e-05, + "loss": 0.3806, + "step": 3165, + "task_loss": 0.16499730944633484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756693092975225, + "compression_loss": 0.0, + "distillation_loss": 0.43263140320777893, + "epoch": 3.01, + "learning_rate": 3.05214628987112e-05, + "loss": 0.4179, + "step": 3166, + "task_loss": 0.2850673496723175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756831861169803, + "compression_loss": 0.0, + "distillation_loss": 0.23246167600154877, + "epoch": 3.01, + "learning_rate": 3.0511070273942217e-05, + "loss": 0.2326, + "step": 3167, + "task_loss": 0.23360560834407806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7756970576590777, + "compression_loss": 0.0, + "distillation_loss": 0.1095157265663147, + "epoch": 3.01, + "learning_rate": 3.0500676648060776e-05, + "loss": 0.1015, + "step": 3168, + "task_loss": 0.029302000999450684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7757109239248183, + "compression_loss": 0.0, + "distillation_loss": 0.31011509895324707, + "epoch": 3.01, + "learning_rate": 3.049028202295494e-05, + "loss": 0.3013, + "step": 3169, + "task_loss": 0.22210848331451416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7757247849152058, + "compression_loss": 0.0, + "distillation_loss": 0.24750226736068726, + "epoch": 3.01, + "learning_rate": 3.0479886400512937e-05, + "loss": 0.2541, + "step": 3170, + "task_loss": 0.31305140256881714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.775738640631244, + "compression_loss": 0.0, + "distillation_loss": 0.3639996349811554, + "epoch": 3.01, + "learning_rate": 3.0469489782623163e-05, + "loss": 0.3661, + "step": 3171, + "task_loss": 0.3845079243183136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7757524910739364, + "compression_loss": 0.0, + "distillation_loss": 0.20695006847381592, + "epoch": 3.01, + "learning_rate": 3.045909217117424e-05, + "loss": 0.2035, + "step": 3172, + "task_loss": 0.1726771593093872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7757663362442868, + "compression_loss": 0.0, + "distillation_loss": 0.511163592338562, + "epoch": 3.01, + "learning_rate": 3.0448693568054924e-05, + "loss": 0.5024, + "step": 3173, + "task_loss": 0.42367231845855713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7757801761432989, + "compression_loss": 0.0, + "distillation_loss": 0.4330875873565674, + "epoch": 3.01, + "learning_rate": 3.0438293975154186e-05, + "loss": 0.4055, + "step": 3174, + "task_loss": 0.157136470079422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7757940107719762, + "compression_loss": 0.0, + "distillation_loss": 0.3330579698085785, + "epoch": 3.02, + "learning_rate": 3.042789339436116e-05, + "loss": 0.3187, + "step": 3175, + "task_loss": 0.18938302993774414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7758078401313225, + "compression_loss": 0.0, + "distillation_loss": 0.26333099603652954, + "epoch": 3.02, + "learning_rate": 3.041749182756515e-05, + "loss": 0.2615, + "step": 3176, + "task_loss": 0.24524812400341034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7758216642223416, + "compression_loss": 0.0, + "distillation_loss": 0.11996375769376755, + "epoch": 3.02, + "learning_rate": 3.0407089276655664e-05, + "loss": 0.1205, + "step": 3177, + "task_loss": 0.12564218044281006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.775835483046037, + "compression_loss": 0.0, + "distillation_loss": 0.18018177151679993, + "epoch": 3.02, + "learning_rate": 3.039668574352237e-05, + "loss": 0.1752, + "step": 3178, + "task_loss": 0.12995178997516632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7758492966034125, + "compression_loss": 0.0, + "distillation_loss": 0.23442718386650085, + "epoch": 3.02, + "learning_rate": 3.0386281230055113e-05, + "loss": 0.2305, + "step": 3179, + "task_loss": 0.19505998492240906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7758631048954718, + "compression_loss": 0.0, + "distillation_loss": 0.22563649713993073, + "epoch": 3.02, + "learning_rate": 3.0375875738143938e-05, + "loss": 0.2198, + "step": 3180, + "task_loss": 0.16684770584106445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7758769079232184, + "compression_loss": 0.0, + "distillation_loss": 0.05871054530143738, + "epoch": 3.02, + "learning_rate": 3.0365469269679042e-05, + "loss": 0.0537, + "step": 3181, + "task_loss": 0.008890055119991302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7758907056876562, + "compression_loss": 0.0, + "distillation_loss": 0.07384837418794632, + "epoch": 3.02, + "learning_rate": 3.0355061826550813e-05, + "loss": 0.078, + "step": 3182, + "task_loss": 0.1150825098156929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7759044981897888, + "compression_loss": 0.0, + "distillation_loss": 0.29694080352783203, + "epoch": 3.02, + "learning_rate": 3.0344653410649815e-05, + "loss": 0.2839, + "step": 3183, + "task_loss": 0.1665906310081482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7759182854306198, + "compression_loss": 0.0, + "distillation_loss": 0.06391322612762451, + "epoch": 3.02, + "learning_rate": 3.033424402386678e-05, + "loss": 0.0754, + "step": 3184, + "task_loss": 0.1787717342376709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.775932067411153, + "compression_loss": 0.0, + "distillation_loss": 0.09081655740737915, + "epoch": 3.02, + "learning_rate": 3.032383366809263e-05, + "loss": 0.0906, + "step": 3185, + "task_loss": 0.088164322078228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.775945844132392, + "compression_loss": 0.0, + "distillation_loss": 0.2625811696052551, + "epoch": 3.03, + "learning_rate": 3.031342234521845e-05, + "loss": 0.2886, + "step": 3186, + "task_loss": 0.5232532620429993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7759596155953404, + "compression_loss": 0.0, + "distillation_loss": 0.05291663110256195, + "epoch": 3.03, + "learning_rate": 3.030301005713552e-05, + "loss": 0.0481, + "step": 3187, + "task_loss": 0.004770837724208832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7759733818010022, + "compression_loss": 0.0, + "distillation_loss": 0.15582989156246185, + "epoch": 3.03, + "learning_rate": 3.0292596805735274e-05, + "loss": 0.1483, + "step": 3188, + "task_loss": 0.08084064722061157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7759871427503807, + "compression_loss": 0.0, + "distillation_loss": 0.20074017345905304, + "epoch": 3.03, + "learning_rate": 3.028218259290932e-05, + "loss": 0.1874, + "step": 3189, + "task_loss": 0.06705089658498764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.77600089844448, + "compression_loss": 0.0, + "distillation_loss": 0.06447502970695496, + "epoch": 3.03, + "learning_rate": 3.0271767420549463e-05, + "loss": 0.0681, + "step": 3190, + "task_loss": 0.1006789430975914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7760146488843033, + "compression_loss": 0.0, + "distillation_loss": 0.2723419964313507, + "epoch": 3.03, + "learning_rate": 3.0261351290547667e-05, + "loss": 0.2704, + "step": 3191, + "task_loss": 0.25289347767829895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7760283940708547, + "compression_loss": 0.0, + "distillation_loss": 0.2833019196987152, + "epoch": 3.03, + "learning_rate": 3.025093420479607e-05, + "loss": 0.2691, + "step": 3192, + "task_loss": 0.1415441334247589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7760421340051376, + "compression_loss": 0.0, + "distillation_loss": 0.12315277010202408, + "epoch": 3.03, + "learning_rate": 3.0240516165186976e-05, + "loss": 0.1165, + "step": 3193, + "task_loss": 0.056251607835292816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7760558686881558, + "compression_loss": 0.0, + "distillation_loss": 0.39456677436828613, + "epoch": 3.03, + "learning_rate": 3.0230097173612896e-05, + "loss": 0.3795, + "step": 3194, + "task_loss": 0.24376647174358368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776069598120913, + "compression_loss": 0.0, + "distillation_loss": 0.13045181334018707, + "epoch": 3.03, + "learning_rate": 3.021967723196647e-05, + "loss": 0.1243, + "step": 3195, + "task_loss": 0.06902102380990982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7760833223044128, + "compression_loss": 0.0, + "distillation_loss": 0.13044685125350952, + "epoch": 3.04, + "learning_rate": 3.020925634214054e-05, + "loss": 0.1433, + "step": 3196, + "task_loss": 0.25945261120796204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776097041239659, + "compression_loss": 0.0, + "distillation_loss": 0.07756803929805756, + "epoch": 3.04, + "learning_rate": 3.01988345060281e-05, + "loss": 0.0819, + "step": 3197, + "task_loss": 0.12053569406270981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7761107549276551, + "compression_loss": 0.0, + "distillation_loss": 0.285247802734375, + "epoch": 3.04, + "learning_rate": 3.018841172552234e-05, + "loss": 0.2711, + "step": 3198, + "task_loss": 0.14363518357276917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7761244633694051, + "compression_loss": 0.0, + "distillation_loss": 0.36870819330215454, + "epoch": 3.04, + "learning_rate": 3.01779880025166e-05, + "loss": 0.3675, + "step": 3199, + "task_loss": 0.3562384843826294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7761381665659124, + "compression_loss": 0.0, + "distillation_loss": 0.09676843881607056, + "epoch": 3.04, + "learning_rate": 3.0167563338904402e-05, + "loss": 0.0949, + "step": 3200, + "task_loss": 0.0785759687423706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7761518645181807, + "compression_loss": 0.0, + "distillation_loss": 0.0968627855181694, + "epoch": 3.04, + "learning_rate": 3.0157137736579445e-05, + "loss": 0.0951, + "step": 3201, + "task_loss": 0.07877330482006073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7761655572272138, + "compression_loss": 0.0, + "distillation_loss": 0.2662609815597534, + "epoch": 3.04, + "learning_rate": 3.014671119743556e-05, + "loss": 0.2659, + "step": 3202, + "task_loss": 0.26285213232040405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7761792446940153, + "compression_loss": 0.0, + "distillation_loss": 0.07620637863874435, + "epoch": 3.04, + "learning_rate": 3.013628372336682e-05, + "loss": 0.0915, + "step": 3203, + "task_loss": 0.22959944605827332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776192926919589, + "compression_loss": 0.0, + "distillation_loss": 0.13465449213981628, + "epoch": 3.04, + "learning_rate": 3.0125855316267394e-05, + "loss": 0.1472, + "step": 3204, + "task_loss": 0.2603115439414978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762066039049383, + "compression_loss": 0.0, + "distillation_loss": 0.14497964084148407, + "epoch": 3.04, + "learning_rate": 3.0115425978031663e-05, + "loss": 0.1501, + "step": 3205, + "task_loss": 0.1961698830127716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762202756510672, + "compression_loss": 0.0, + "distillation_loss": 0.24775618314743042, + "epoch": 3.04, + "learning_rate": 3.0104995710554174e-05, + "loss": 0.2393, + "step": 3206, + "task_loss": 0.16288542747497559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762339421589793, + "compression_loss": 0.0, + "distillation_loss": 0.13831278681755066, + "epoch": 3.05, + "learning_rate": 3.0094564515729623e-05, + "loss": 0.1352, + "step": 3207, + "task_loss": 0.10688167065382004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762476034296781, + "compression_loss": 0.0, + "distillation_loss": 0.04619987681508064, + "epoch": 3.05, + "learning_rate": 3.0084132395452896e-05, + "loss": 0.0428, + "step": 3208, + "task_loss": 0.011970948427915573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762612594641676, + "compression_loss": 0.0, + "distillation_loss": 0.048860594630241394, + "epoch": 3.05, + "learning_rate": 3.0073699351619033e-05, + "loss": 0.0447, + "step": 3209, + "task_loss": 0.0075436122715473175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762749102634512, + "compression_loss": 0.0, + "distillation_loss": 0.09679286926984787, + "epoch": 3.05, + "learning_rate": 3.0063265386123247e-05, + "loss": 0.0926, + "step": 3210, + "task_loss": 0.054950546473264694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7762885558285327, + "compression_loss": 0.0, + "distillation_loss": 0.47562968730926514, + "epoch": 3.05, + "learning_rate": 3.0052830500860912e-05, + "loss": 0.4544, + "step": 3211, + "task_loss": 0.26370471715927124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763021961604157, + "compression_loss": 0.0, + "distillation_loss": 0.10135069489479065, + "epoch": 3.05, + "learning_rate": 3.0042394697727587e-05, + "loss": 0.0972, + "step": 3212, + "task_loss": 0.059726666659116745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763158312601041, + "compression_loss": 0.0, + "distillation_loss": 0.17847372591495514, + "epoch": 3.05, + "learning_rate": 3.0031957978618986e-05, + "loss": 0.1807, + "step": 3213, + "task_loss": 0.20080722868442535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763294611286014, + "compression_loss": 0.0, + "distillation_loss": 0.17386582493782043, + "epoch": 3.05, + "learning_rate": 3.002152034543098e-05, + "loss": 0.1664, + "step": 3214, + "task_loss": 0.09940002113580704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763430857669111, + "compression_loss": 0.0, + "distillation_loss": 0.10909870266914368, + "epoch": 3.05, + "learning_rate": 3.0011081800059616e-05, + "loss": 0.1128, + "step": 3215, + "task_loss": 0.14585895836353302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763567051760373, + "compression_loss": 0.0, + "distillation_loss": 0.045543644577264786, + "epoch": 3.05, + "learning_rate": 3.0000642344401113e-05, + "loss": 0.0494, + "step": 3216, + "task_loss": 0.08457530289888382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763703193569834, + "compression_loss": 0.0, + "distillation_loss": 0.14089834690093994, + "epoch": 3.06, + "learning_rate": 2.999020198035184e-05, + "loss": 0.1429, + "step": 3217, + "task_loss": 0.16091729700565338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763839283107532, + "compression_loss": 0.0, + "distillation_loss": 0.15442857146263123, + "epoch": 3.06, + "learning_rate": 2.997976070980836e-05, + "loss": 0.1538, + "step": 3218, + "task_loss": 0.1483321487903595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7763975320383504, + "compression_loss": 0.0, + "distillation_loss": 0.1512613296508789, + "epoch": 3.06, + "learning_rate": 2.996931853466734e-05, + "loss": 0.1643, + "step": 3219, + "task_loss": 0.28167420625686646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7764111305407785, + "compression_loss": 0.0, + "distillation_loss": 0.129611536860466, + "epoch": 3.06, + "learning_rate": 2.9958875456825692e-05, + "loss": 0.1331, + "step": 3220, + "task_loss": 0.1641448736190796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7764247238190414, + "compression_loss": 0.0, + "distillation_loss": 0.06467771530151367, + "epoch": 3.06, + "learning_rate": 2.9948431478180434e-05, + "loss": 0.0646, + "step": 3221, + "task_loss": 0.06406955420970917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7764383118741426, + "compression_loss": 0.0, + "distillation_loss": 0.040332306176424026, + "epoch": 3.06, + "learning_rate": 2.9937986600628758e-05, + "loss": 0.0371, + "step": 3222, + "task_loss": 0.008311469107866287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776451894707086, + "compression_loss": 0.0, + "distillation_loss": 0.23755374550819397, + "epoch": 3.06, + "learning_rate": 2.992754082606804e-05, + "loss": 0.2371, + "step": 3223, + "task_loss": 0.23277811706066132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776465472318875, + "compression_loss": 0.0, + "distillation_loss": 0.2364281862974167, + "epoch": 3.06, + "learning_rate": 2.9917094156395796e-05, + "loss": 0.2324, + "step": 3224, + "task_loss": 0.19599126279354095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7764790447105135, + "compression_loss": 0.0, + "distillation_loss": 0.058149438351392746, + "epoch": 3.06, + "learning_rate": 2.990664659350973e-05, + "loss": 0.0539, + "step": 3225, + "task_loss": 0.015191374346613884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7764926118830051, + "compression_loss": 0.0, + "distillation_loss": 0.20538832247257233, + "epoch": 3.06, + "learning_rate": 2.9896198139307668e-05, + "loss": 0.1981, + "step": 3226, + "task_loss": 0.13257679343223572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7765061738373535, + "compression_loss": 0.0, + "distillation_loss": 0.033098138868808746, + "epoch": 3.06, + "learning_rate": 2.9885748795687642e-05, + "loss": 0.0396, + "step": 3227, + "task_loss": 0.09805357456207275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7765197305745625, + "compression_loss": 0.0, + "distillation_loss": 0.13397681713104248, + "epoch": 3.07, + "learning_rate": 2.9875298564547805e-05, + "loss": 0.1336, + "step": 3228, + "task_loss": 0.1301775723695755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7765332820956355, + "compression_loss": 0.0, + "distillation_loss": 0.1260242760181427, + "epoch": 3.07, + "learning_rate": 2.9864847447786503e-05, + "loss": 0.1186, + "step": 3229, + "task_loss": 0.05128313973546028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7765468284015764, + "compression_loss": 0.0, + "distillation_loss": 0.14219728112220764, + "epoch": 3.07, + "learning_rate": 2.9854395447302246e-05, + "loss": 0.1386, + "step": 3230, + "task_loss": 0.10616907477378845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7765603694933888, + "compression_loss": 0.0, + "distillation_loss": 0.06799346953630447, + "epoch": 3.07, + "learning_rate": 2.9843942564993672e-05, + "loss": 0.0646, + "step": 3231, + "task_loss": 0.0343768373131752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7765739053720765, + "compression_loss": 0.0, + "distillation_loss": 0.03196606785058975, + "epoch": 3.07, + "learning_rate": 2.98334888027596e-05, + "loss": 0.0436, + "step": 3232, + "task_loss": 0.14865921437740326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776587436038643, + "compression_loss": 0.0, + "distillation_loss": 0.12633199989795685, + "epoch": 3.07, + "learning_rate": 2.9823034162499007e-05, + "loss": 0.1216, + "step": 3233, + "task_loss": 0.07861751317977905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766009614940921, + "compression_loss": 0.0, + "distillation_loss": 0.11836487054824829, + "epoch": 3.07, + "learning_rate": 2.981257864611104e-05, + "loss": 0.1123, + "step": 3234, + "task_loss": 0.058093033730983734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766144817394275, + "compression_loss": 0.0, + "distillation_loss": 0.09729697555303574, + "epoch": 3.07, + "learning_rate": 2.980212225549498e-05, + "loss": 0.0916, + "step": 3235, + "task_loss": 0.040597811341285706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766279967756528, + "compression_loss": 0.0, + "distillation_loss": 0.043897949159145355, + "epoch": 3.07, + "learning_rate": 2.9791664992550273e-05, + "loss": 0.0408, + "step": 3236, + "task_loss": 0.012871745973825455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766415066037717, + "compression_loss": 0.0, + "distillation_loss": 0.27021345496177673, + "epoch": 3.07, + "learning_rate": 2.978120685917656e-05, + "loss": 0.2581, + "step": 3237, + "task_loss": 0.1487370729446411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766550112247879, + "compression_loss": 0.0, + "distillation_loss": 0.24136856198310852, + "epoch": 3.08, + "learning_rate": 2.9770747857273584e-05, + "loss": 0.2404, + "step": 3238, + "task_loss": 0.23204952478408813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766685106397051, + "compression_loss": 0.0, + "distillation_loss": 0.038442693650722504, + "epoch": 3.08, + "learning_rate": 2.9760287988741293e-05, + "loss": 0.0354, + "step": 3239, + "task_loss": 0.008406125009059906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776682004849527, + "compression_loss": 0.0, + "distillation_loss": 0.2253015786409378, + "epoch": 3.08, + "learning_rate": 2.9749827255479755e-05, + "loss": 0.2234, + "step": 3240, + "task_loss": 0.20639631152153015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7766954938552572, + "compression_loss": 0.0, + "distillation_loss": 0.19598889350891113, + "epoch": 3.08, + "learning_rate": 2.9739365659389223e-05, + "loss": 0.1854, + "step": 3241, + "task_loss": 0.08999612182378769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7767089776578995, + "compression_loss": 0.0, + "distillation_loss": 0.029667746275663376, + "epoch": 3.08, + "learning_rate": 2.972890320237009e-05, + "loss": 0.0271, + "step": 3242, + "task_loss": 0.0044384244829416275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7767224562584574, + "compression_loss": 0.0, + "distillation_loss": 0.03521028161048889, + "epoch": 3.08, + "learning_rate": 2.971843988632292e-05, + "loss": 0.0321, + "step": 3243, + "task_loss": 0.00444982573390007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7767359296579347, + "compression_loss": 0.0, + "distillation_loss": 0.06014445796608925, + "epoch": 3.08, + "learning_rate": 2.970797571314842e-05, + "loss": 0.0566, + "step": 3244, + "task_loss": 0.02440224587917328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7767493978573352, + "compression_loss": 0.0, + "distillation_loss": 0.27860748767852783, + "epoch": 3.08, + "learning_rate": 2.9697510684747454e-05, + "loss": 0.2662, + "step": 3245, + "task_loss": 0.15431803464889526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7767628608576623, + "compression_loss": 0.0, + "distillation_loss": 0.36406850814819336, + "epoch": 3.08, + "learning_rate": 2.9687044803021057e-05, + "loss": 0.3679, + "step": 3246, + "task_loss": 0.4021039605140686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.77677631865992, + "compression_loss": 0.0, + "distillation_loss": 0.4641655385494232, + "epoch": 3.08, + "learning_rate": 2.9676578069870392e-05, + "loss": 0.4604, + "step": 3247, + "task_loss": 0.4263751804828644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7767897712651117, + "compression_loss": 0.0, + "distillation_loss": 0.10289011895656586, + "epoch": 3.08, + "learning_rate": 2.9666110487196798e-05, + "loss": 0.1047, + "step": 3248, + "task_loss": 0.12141219526529312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7768032186742413, + "compression_loss": 0.0, + "distillation_loss": 0.09398964792490005, + "epoch": 3.09, + "learning_rate": 2.9655642056901762e-05, + "loss": 0.0971, + "step": 3249, + "task_loss": 0.1248759999871254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7768166608883122, + "compression_loss": 0.0, + "distillation_loss": 0.20722612738609314, + "epoch": 3.09, + "learning_rate": 2.9645172780886927e-05, + "loss": 0.2046, + "step": 3250, + "task_loss": 0.18093663454055786 + }, + { + "epoch": 3.09, + "eval_accuracy": 0.8795871559633027, + "eval_loss": 0.5020281076431274, + "eval_runtime": 17.7871, + "eval_samples_per_second": 49.024, + "eval_steps_per_second": 6.128, + "step": 3250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7768300979083285, + "compression_loss": 0.0, + "distillation_loss": 0.0628579705953598, + "epoch": 3.09, + "learning_rate": 2.9634702661054085e-05, + "loss": 0.0577, + "step": 3251, + "task_loss": 0.010829754173755646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7768435297352936, + "compression_loss": 0.0, + "distillation_loss": 0.04929596930742264, + "epoch": 3.09, + "learning_rate": 2.962423169930518e-05, + "loss": 0.0538, + "step": 3252, + "task_loss": 0.09398236870765686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7768569563702111, + "compression_loss": 0.0, + "distillation_loss": 0.055220600217580795, + "epoch": 3.09, + "learning_rate": 2.961375989754232e-05, + "loss": 0.0594, + "step": 3253, + "task_loss": 0.09694081544876099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776870377814085, + "compression_loss": 0.0, + "distillation_loss": 0.14030757546424866, + "epoch": 3.09, + "learning_rate": 2.9603287257667754e-05, + "loss": 0.1347, + "step": 3254, + "task_loss": 0.08383115381002426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7768837940679186, + "compression_loss": 0.0, + "distillation_loss": 0.05990925058722496, + "epoch": 3.09, + "learning_rate": 2.9592813781583885e-05, + "loss": 0.0712, + "step": 3255, + "task_loss": 0.17235514521598816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776897205132716, + "compression_loss": 0.0, + "distillation_loss": 0.14225684106349945, + "epoch": 3.09, + "learning_rate": 2.958233947119328e-05, + "loss": 0.144, + "step": 3256, + "task_loss": 0.16001349687576294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7769106110094804, + "compression_loss": 0.0, + "distillation_loss": 0.03743474930524826, + "epoch": 3.09, + "learning_rate": 2.9571864328398636e-05, + "loss": 0.0344, + "step": 3257, + "task_loss": 0.006805334240198135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776924011699216, + "compression_loss": 0.0, + "distillation_loss": 0.13843439519405365, + "epoch": 3.09, + "learning_rate": 2.956138835510282e-05, + "loss": 0.1333, + "step": 3258, + "task_loss": 0.08740270882844925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.776937407202926, + "compression_loss": 0.0, + "distillation_loss": 0.044483255594968796, + "epoch": 3.09, + "learning_rate": 2.9550911553208838e-05, + "loss": 0.0404, + "step": 3259, + "task_loss": 0.003829212859272957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7769507975216144, + "compression_loss": 0.0, + "distillation_loss": 0.3198070526123047, + "epoch": 3.1, + "learning_rate": 2.954043392461986e-05, + "loss": 0.3065, + "step": 3260, + "task_loss": 0.18672311305999756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7769641826562849, + "compression_loss": 0.0, + "distillation_loss": 0.1478300392627716, + "epoch": 3.1, + "learning_rate": 2.952995547123919e-05, + "loss": 0.1487, + "step": 3261, + "task_loss": 0.1560606062412262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7769775626079409, + "compression_loss": 0.0, + "distillation_loss": 0.05190206691622734, + "epoch": 3.1, + "learning_rate": 2.9519476194970286e-05, + "loss": 0.0471, + "step": 3262, + "task_loss": 0.0037672966718673706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7769909373775864, + "compression_loss": 0.0, + "distillation_loss": 0.07992243766784668, + "epoch": 3.1, + "learning_rate": 2.9508996097716777e-05, + "loss": 0.0892, + "step": 3263, + "task_loss": 0.17313553392887115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770043069662249, + "compression_loss": 0.0, + "distillation_loss": 0.1712629646062851, + "epoch": 3.1, + "learning_rate": 2.949851518138241e-05, + "loss": 0.1902, + "step": 3264, + "task_loss": 0.3601612150669098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770176713748601, + "compression_loss": 0.0, + "distillation_loss": 0.020557358860969543, + "epoch": 3.1, + "learning_rate": 2.948803344787109e-05, + "loss": 0.019, + "step": 3265, + "task_loss": 0.004615806043148041 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770310306044956, + "compression_loss": 0.0, + "distillation_loss": 0.058595433831214905, + "epoch": 3.1, + "learning_rate": 2.947755089908688e-05, + "loss": 0.0613, + "step": 3266, + "task_loss": 0.0856269896030426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770443846561353, + "compression_loss": 0.0, + "distillation_loss": 0.13503427803516388, + "epoch": 3.1, + "learning_rate": 2.946706753693398e-05, + "loss": 0.1376, + "step": 3267, + "task_loss": 0.16037359833717346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770577335307828, + "compression_loss": 0.0, + "distillation_loss": 0.06786477565765381, + "epoch": 3.1, + "learning_rate": 2.945658336331676e-05, + "loss": 0.0684, + "step": 3268, + "task_loss": 0.07298760861158371 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770710772294416, + "compression_loss": 0.0, + "distillation_loss": 0.06129030883312225, + "epoch": 3.1, + "learning_rate": 2.9446098380139703e-05, + "loss": 0.0586, + "step": 3269, + "task_loss": 0.03390103578567505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770844157531156, + "compression_loss": 0.0, + "distillation_loss": 0.07370973378419876, + "epoch": 3.11, + "learning_rate": 2.9435612589307458e-05, + "loss": 0.0731, + "step": 3270, + "task_loss": 0.06739000231027603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7770977491028085, + "compression_loss": 0.0, + "distillation_loss": 0.0707666426897049, + "epoch": 3.11, + "learning_rate": 2.942512599272483e-05, + "loss": 0.0739, + "step": 3271, + "task_loss": 0.1019490510225296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771110772795238, + "compression_loss": 0.0, + "distillation_loss": 0.08299912512302399, + "epoch": 3.11, + "learning_rate": 2.9414638592296752e-05, + "loss": 0.0816, + "step": 3272, + "task_loss": 0.06940528750419617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771244002842652, + "compression_loss": 0.0, + "distillation_loss": 0.13758215308189392, + "epoch": 3.11, + "learning_rate": 2.9404150389928316e-05, + "loss": 0.1317, + "step": 3273, + "task_loss": 0.07901225984096527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771377181180366, + "compression_loss": 0.0, + "distillation_loss": 0.14945004880428314, + "epoch": 3.11, + "learning_rate": 2.9393661387524745e-05, + "loss": 0.1439, + "step": 3274, + "task_loss": 0.09377557784318924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771510307818414, + "compression_loss": 0.0, + "distillation_loss": 0.13088908791542053, + "epoch": 3.11, + "learning_rate": 2.9383171586991424e-05, + "loss": 0.1261, + "step": 3275, + "task_loss": 0.0828273668885231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771643382766835, + "compression_loss": 0.0, + "distillation_loss": 0.1356673389673233, + "epoch": 3.11, + "learning_rate": 2.9372680990233875e-05, + "loss": 0.1301, + "step": 3276, + "task_loss": 0.07979649305343628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771776406035665, + "compression_loss": 0.0, + "distillation_loss": 0.15914839506149292, + "epoch": 3.11, + "learning_rate": 2.9362189599157776e-05, + "loss": 0.1522, + "step": 3277, + "task_loss": 0.08918256312608719 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7771909377634941, + "compression_loss": 0.0, + "distillation_loss": 0.031945958733558655, + "epoch": 3.11, + "learning_rate": 2.9351697415668917e-05, + "loss": 0.0375, + "step": 3278, + "task_loss": 0.08777586370706558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772042297574699, + "compression_loss": 0.0, + "distillation_loss": 0.07947254180908203, + "epoch": 3.11, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.0822, + "step": 3279, + "task_loss": 0.106949083507061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772175165864978, + "compression_loss": 0.0, + "distillation_loss": 0.11136071383953094, + "epoch": 3.11, + "learning_rate": 2.9330710679076916e-05, + "loss": 0.1059, + "step": 3280, + "task_loss": 0.057253483682870865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772307982515813, + "compression_loss": 0.0, + "distillation_loss": 0.04170932248234749, + "epoch": 3.12, + "learning_rate": 2.9320216129786116e-05, + "loss": 0.0457, + "step": 3281, + "task_loss": 0.08145752549171448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777244074753724, + "compression_loss": 0.0, + "distillation_loss": 0.014708688482642174, + "epoch": 3.12, + "learning_rate": 2.9309720795707257e-05, + "loss": 0.0136, + "step": 3282, + "task_loss": 0.0032885856926441193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772573460939297, + "compression_loss": 0.0, + "distillation_loss": 0.08308391273021698, + "epoch": 3.12, + "learning_rate": 2.9299224678746855e-05, + "loss": 0.0762, + "step": 3283, + "task_loss": 0.014657005667686462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772706122732022, + "compression_loss": 0.0, + "distillation_loss": 0.1468481719493866, + "epoch": 3.12, + "learning_rate": 2.928872778081158e-05, + "loss": 0.1414, + "step": 3284, + "task_loss": 0.09249945729970932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772838732925449, + "compression_loss": 0.0, + "distillation_loss": 0.029921449720859528, + "epoch": 3.12, + "learning_rate": 2.9278230103808257e-05, + "loss": 0.0345, + "step": 3285, + "task_loss": 0.07549962401390076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7772971291529618, + "compression_loss": 0.0, + "distillation_loss": 0.0418395921587944, + "epoch": 3.12, + "learning_rate": 2.9267731649643827e-05, + "loss": 0.048, + "step": 3286, + "task_loss": 0.10373726487159729 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7773103798554563, + "compression_loss": 0.0, + "distillation_loss": 0.05867641791701317, + "epoch": 3.12, + "learning_rate": 2.9257232420225394e-05, + "loss": 0.0721, + "step": 3287, + "task_loss": 0.19298899173736572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7773236254010323, + "compression_loss": 0.0, + "distillation_loss": 0.05694715678691864, + "epoch": 3.12, + "learning_rate": 2.9246732417460178e-05, + "loss": 0.0553, + "step": 3288, + "task_loss": 0.0401817262172699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7773368657906933, + "compression_loss": 0.0, + "distillation_loss": 0.056060872972011566, + "epoch": 3.12, + "learning_rate": 2.9236231643255578e-05, + "loss": 0.0606, + "step": 3289, + "task_loss": 0.10119974613189697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7773501010254431, + "compression_loss": 0.0, + "distillation_loss": 0.08849294483661652, + "epoch": 3.12, + "learning_rate": 2.922573009951909e-05, + "loss": 0.0865, + "step": 3290, + "task_loss": 0.06868458539247513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7773633311062854, + "compression_loss": 0.0, + "distillation_loss": 0.0786486566066742, + "epoch": 3.13, + "learning_rate": 2.9215227788158382e-05, + "loss": 0.0987, + "step": 3291, + "task_loss": 0.27886343002319336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7773765560342237, + "compression_loss": 0.0, + "distillation_loss": 0.08726784586906433, + "epoch": 3.13, + "learning_rate": 2.920472471108125e-05, + "loss": 0.0813, + "step": 3292, + "task_loss": 0.0275074802339077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777389775810262, + "compression_loss": 0.0, + "distillation_loss": 0.10106363892555237, + "epoch": 3.13, + "learning_rate": 2.919422087019561e-05, + "loss": 0.0965, + "step": 3293, + "task_loss": 0.055741891264915466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774029904354037, + "compression_loss": 0.0, + "distillation_loss": 0.14217272400856018, + "epoch": 3.13, + "learning_rate": 2.9183716267409562e-05, + "loss": 0.1446, + "step": 3294, + "task_loss": 0.16670069098472595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774161999106526, + "compression_loss": 0.0, + "distillation_loss": 0.10469906777143478, + "epoch": 3.13, + "learning_rate": 2.9173210904631297e-05, + "loss": 0.1019, + "step": 3295, + "task_loss": 0.07687127590179443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774294042370123, + "compression_loss": 0.0, + "distillation_loss": 0.17787286639213562, + "epoch": 3.13, + "learning_rate": 2.916270478376918e-05, + "loss": 0.1797, + "step": 3296, + "task_loss": 0.19574972987174988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774426034154867, + "compression_loss": 0.0, + "distillation_loss": 0.22817805409431458, + "epoch": 3.13, + "learning_rate": 2.9152197906731687e-05, + "loss": 0.2223, + "step": 3297, + "task_loss": 0.16891571879386902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774557974470792, + "compression_loss": 0.0, + "distillation_loss": 0.2010933756828308, + "epoch": 3.13, + "learning_rate": 2.9141690275427445e-05, + "loss": 0.2089, + "step": 3298, + "task_loss": 0.27871859073638916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774689863327936, + "compression_loss": 0.0, + "distillation_loss": 0.07105930149555206, + "epoch": 3.13, + "learning_rate": 2.9131181891765226e-05, + "loss": 0.067, + "step": 3299, + "task_loss": 0.030665744096040726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7774821700736336, + "compression_loss": 0.0, + "distillation_loss": 0.03237374126911163, + "epoch": 3.13, + "learning_rate": 2.9120672757653916e-05, + "loss": 0.0299, + "step": 3300, + "task_loss": 0.00755789689719677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777495348670603, + "compression_loss": 0.0, + "distillation_loss": 0.04193927347660065, + "epoch": 3.13, + "learning_rate": 2.9110162875002552e-05, + "loss": 0.0383, + "step": 3301, + "task_loss": 0.005132727324962616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7775085221247052, + "compression_loss": 0.0, + "distillation_loss": 0.02390095964074135, + "epoch": 3.14, + "learning_rate": 2.909965224572031e-05, + "loss": 0.0227, + "step": 3302, + "task_loss": 0.011394314467906952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7775216904369441, + "compression_loss": 0.0, + "distillation_loss": 0.11238003522157669, + "epoch": 3.14, + "learning_rate": 2.9089140871716492e-05, + "loss": 0.1184, + "step": 3303, + "task_loss": 0.1725841611623764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7775348536083233, + "compression_loss": 0.0, + "distillation_loss": 0.09369160979986191, + "epoch": 3.14, + "learning_rate": 2.9078628754900543e-05, + "loss": 0.0978, + "step": 3304, + "task_loss": 0.1350669115781784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7775480116398465, + "compression_loss": 0.0, + "distillation_loss": 0.04276084154844284, + "epoch": 3.14, + "learning_rate": 2.9068115897182036e-05, + "loss": 0.0476, + "step": 3305, + "task_loss": 0.09119967371225357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7775611645325174, + "compression_loss": 0.0, + "distillation_loss": 0.14125396311283112, + "epoch": 3.14, + "learning_rate": 2.905760230047068e-05, + "loss": 0.1457, + "step": 3306, + "task_loss": 0.18620248138904572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7775743122873398, + "compression_loss": 0.0, + "distillation_loss": 0.05440014600753784, + "epoch": 3.14, + "learning_rate": 2.9047087966676327e-05, + "loss": 0.0512, + "step": 3307, + "task_loss": 0.021976150572299957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777587454905317, + "compression_loss": 0.0, + "distillation_loss": 0.10159926116466522, + "epoch": 3.14, + "learning_rate": 2.903657289770896e-05, + "loss": 0.0933, + "step": 3308, + "task_loss": 0.018225595355033875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777600592387453, + "compression_loss": 0.0, + "distillation_loss": 0.05529090762138367, + "epoch": 3.14, + "learning_rate": 2.902605709547868e-05, + "loss": 0.0516, + "step": 3309, + "task_loss": 0.01865418255329132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7776137247347515, + "compression_loss": 0.0, + "distillation_loss": 0.15098199248313904, + "epoch": 3.14, + "learning_rate": 2.9015540561895738e-05, + "loss": 0.1434, + "step": 3310, + "task_loss": 0.07475800067186356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777626851948216, + "compression_loss": 0.0, + "distillation_loss": 0.0264451764523983, + "epoch": 3.14, + "learning_rate": 2.9005023298870514e-05, + "loss": 0.0323, + "step": 3311, + "task_loss": 0.08548250794410706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7776399740288504, + "compression_loss": 0.0, + "distillation_loss": 0.24020592868328094, + "epoch": 3.15, + "learning_rate": 2.8994505308313523e-05, + "loss": 0.2399, + "step": 3312, + "task_loss": 0.2373048961162567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7776530909776581, + "compression_loss": 0.0, + "distillation_loss": 0.16168448328971863, + "epoch": 3.15, + "learning_rate": 2.8983986592135404e-05, + "loss": 0.1736, + "step": 3313, + "task_loss": 0.2803611159324646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7776662027956431, + "compression_loss": 0.0, + "distillation_loss": 0.2303021252155304, + "epoch": 3.15, + "learning_rate": 2.897346715224693e-05, + "loss": 0.23, + "step": 3314, + "task_loss": 0.22724005579948425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7776793094838088, + "compression_loss": 0.0, + "distillation_loss": 0.12179985642433167, + "epoch": 3.15, + "learning_rate": 2.8962946990559013e-05, + "loss": 0.1226, + "step": 3315, + "task_loss": 0.1297818422317505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7776924110431591, + "compression_loss": 0.0, + "distillation_loss": 0.09324241429567337, + "epoch": 3.15, + "learning_rate": 2.8952426108982693e-05, + "loss": 0.0956, + "step": 3316, + "task_loss": 0.11695347726345062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777055074746976, + "compression_loss": 0.0, + "distillation_loss": 0.06409186124801636, + "epoch": 3.15, + "learning_rate": 2.8941904509429134e-05, + "loss": 0.0587, + "step": 3317, + "task_loss": 0.010216565802693367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777185987794278, + "compression_loss": 0.0, + "distillation_loss": 0.1214500218629837, + "epoch": 3.15, + "learning_rate": 2.8931382193809635e-05, + "loss": 0.1167, + "step": 3318, + "task_loss": 0.0738031342625618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777316849583537, + "compression_loss": 0.0, + "distillation_loss": 0.026341602206230164, + "epoch": 3.15, + "learning_rate": 2.8920859164035625e-05, + "loss": 0.024, + "step": 3319, + "task_loss": 0.00342458114027977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777447660124788, + "compression_loss": 0.0, + "distillation_loss": 0.09703463315963745, + "epoch": 3.15, + "learning_rate": 2.8910335422018664e-05, + "loss": 0.0881, + "step": 3320, + "task_loss": 0.00742473267018795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777578419428068, + "compression_loss": 0.0, + "distillation_loss": 0.16258884966373444, + "epoch": 3.15, + "learning_rate": 2.8899810969670448e-05, + "loss": 0.1614, + "step": 3321, + "task_loss": 0.15071289241313934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777709127503414, + "compression_loss": 0.0, + "distillation_loss": 0.04823547974228859, + "epoch": 3.15, + "learning_rate": 2.8889285808902784e-05, + "loss": 0.0598, + "step": 3322, + "task_loss": 0.16371826827526093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777839784360863, + "compression_loss": 0.0, + "distillation_loss": 0.023103870451450348, + "epoch": 3.16, + "learning_rate": 2.887875994162762e-05, + "loss": 0.0309, + "step": 3323, + "task_loss": 0.10059693455696106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7777970390010451, + "compression_loss": 0.0, + "distillation_loss": 0.1937963366508484, + "epoch": 3.16, + "learning_rate": 2.886823336975703e-05, + "loss": 0.1993, + "step": 3324, + "task_loss": 0.248738631606102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778100944462216, + "compression_loss": 0.0, + "distillation_loss": 0.015010814182460308, + "epoch": 3.16, + "learning_rate": 2.885770609520323e-05, + "loss": 0.014, + "step": 3325, + "task_loss": 0.004603438079357147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778231447726194, + "compression_loss": 0.0, + "distillation_loss": 0.05482349917292595, + "epoch": 3.16, + "learning_rate": 2.8847178119878527e-05, + "loss": 0.0568, + "step": 3326, + "task_loss": 0.07430904358625412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778361899812423, + "compression_loss": 0.0, + "distillation_loss": 0.12944038212299347, + "epoch": 3.16, + "learning_rate": 2.883664944569539e-05, + "loss": 0.1358, + "step": 3327, + "task_loss": 0.19279280304908752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778492300730937, + "compression_loss": 0.0, + "distillation_loss": 0.024749279022216797, + "epoch": 3.16, + "learning_rate": 2.8826120074566414e-05, + "loss": 0.0414, + "step": 3328, + "task_loss": 0.19170710444450378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778622650491777, + "compression_loss": 0.0, + "distillation_loss": 0.04718421772122383, + "epoch": 3.16, + "learning_rate": 2.8815590008404293e-05, + "loss": 0.0442, + "step": 3329, + "task_loss": 0.017278321087360382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778752949104976, + "compression_loss": 0.0, + "distillation_loss": 0.06321703642606735, + "epoch": 3.16, + "learning_rate": 2.8805059249121874e-05, + "loss": 0.0583, + "step": 3330, + "task_loss": 0.013739963993430138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7778883196580573, + "compression_loss": 0.0, + "distillation_loss": 0.17093795537948608, + "epoch": 3.16, + "learning_rate": 2.8794527798632117e-05, + "loss": 0.1645, + "step": 3331, + "task_loss": 0.10624982416629791 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7779013392928603, + "compression_loss": 0.0, + "distillation_loss": 0.09663266688585281, + "epoch": 3.16, + "learning_rate": 2.8783995658848105e-05, + "loss": 0.1007, + "step": 3332, + "task_loss": 0.13778991997241974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7779143538159106, + "compression_loss": 0.0, + "distillation_loss": 0.12523743510246277, + "epoch": 3.17, + "learning_rate": 2.877346283168306e-05, + "loss": 0.1164, + "step": 3333, + "task_loss": 0.03714917227625847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7779273632282115, + "compression_loss": 0.0, + "distillation_loss": 0.1300932914018631, + "epoch": 3.17, + "learning_rate": 2.876292931905032e-05, + "loss": 0.1346, + "step": 3334, + "task_loss": 0.17540670931339264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7779403675307669, + "compression_loss": 0.0, + "distillation_loss": 0.16550663113594055, + "epoch": 3.17, + "learning_rate": 2.875239512286335e-05, + "loss": 0.17, + "step": 3335, + "task_loss": 0.21077799797058105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7779533667245805, + "compression_loss": 0.0, + "distillation_loss": 0.10607212781906128, + "epoch": 3.17, + "learning_rate": 2.8741860245035722e-05, + "loss": 0.0985, + "step": 3336, + "task_loss": 0.030357446521520615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777966360810656, + "compression_loss": 0.0, + "distillation_loss": 0.0917745977640152, + "epoch": 3.17, + "learning_rate": 2.8731324687481176e-05, + "loss": 0.0947, + "step": 3337, + "task_loss": 0.12098343670368195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7779793497899968, + "compression_loss": 0.0, + "distillation_loss": 0.12411828339099884, + "epoch": 3.17, + "learning_rate": 2.8720788452113517e-05, + "loss": 0.115, + "step": 3338, + "task_loss": 0.03254596143960953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.777992333663607, + "compression_loss": 0.0, + "distillation_loss": 0.12530258297920227, + "epoch": 3.17, + "learning_rate": 2.8710251540846723e-05, + "loss": 0.1194, + "step": 3339, + "task_loss": 0.06607921421527863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780053124324899, + "compression_loss": 0.0, + "distillation_loss": 0.10954394936561584, + "epoch": 3.17, + "learning_rate": 2.8699713955594864e-05, + "loss": 0.1203, + "step": 3340, + "task_loss": 0.21743568778038025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780182860976496, + "compression_loss": 0.0, + "distillation_loss": 0.047306716442108154, + "epoch": 3.17, + "learning_rate": 2.8689175698272147e-05, + "loss": 0.045, + "step": 3341, + "task_loss": 0.02468855120241642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780312546600893, + "compression_loss": 0.0, + "distillation_loss": 0.09475719928741455, + "epoch": 3.17, + "learning_rate": 2.8678636770792906e-05, + "loss": 0.0905, + "step": 3342, + "task_loss": 0.052515700459480286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778044218120813, + "compression_loss": 0.0, + "distillation_loss": 0.1709882766008377, + "epoch": 3.17, + "learning_rate": 2.8668097175071572e-05, + "loss": 0.1834, + "step": 3343, + "task_loss": 0.29515278339385986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780571764808243, + "compression_loss": 0.0, + "distillation_loss": 0.04090896621346474, + "epoch": 3.18, + "learning_rate": 2.865755691302272e-05, + "loss": 0.0383, + "step": 3344, + "task_loss": 0.015103261917829514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780701297411269, + "compression_loss": 0.0, + "distillation_loss": 0.07005221396684647, + "epoch": 3.18, + "learning_rate": 2.864701598656104e-05, + "loss": 0.0843, + "step": 3345, + "task_loss": 0.21217550337314606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780830779027245, + "compression_loss": 0.0, + "distillation_loss": 0.030060699209570885, + "epoch": 3.18, + "learning_rate": 2.8636474397601343e-05, + "loss": 0.0275, + "step": 3346, + "task_loss": 0.00450095534324646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7780960209666207, + "compression_loss": 0.0, + "distillation_loss": 0.2950228452682495, + "epoch": 3.18, + "learning_rate": 2.862593214805856e-05, + "loss": 0.2962, + "step": 3347, + "task_loss": 0.30634093284606934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781089589338193, + "compression_loss": 0.0, + "distillation_loss": 0.0678018108010292, + "epoch": 3.18, + "learning_rate": 2.8615389239847734e-05, + "loss": 0.0648, + "step": 3348, + "task_loss": 0.03745712339878082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781218918053238, + "compression_loss": 0.0, + "distillation_loss": 0.1801690310239792, + "epoch": 3.18, + "learning_rate": 2.8604845674884045e-05, + "loss": 0.1866, + "step": 3349, + "task_loss": 0.24447283148765564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778134819582138, + "compression_loss": 0.0, + "distillation_loss": 0.05510277673602104, + "epoch": 3.18, + "learning_rate": 2.8594301455082777e-05, + "loss": 0.0609, + "step": 3350, + "task_loss": 0.11345023661851883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781477422652656, + "compression_loss": 0.0, + "distillation_loss": 0.1684875637292862, + "epoch": 3.18, + "learning_rate": 2.8583756582359338e-05, + "loss": 0.1605, + "step": 3351, + "task_loss": 0.08910365402698517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781606598557104, + "compression_loss": 0.0, + "distillation_loss": 0.13622252643108368, + "epoch": 3.18, + "learning_rate": 2.8573211058629262e-05, + "loss": 0.1293, + "step": 3352, + "task_loss": 0.06717808544635773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781735723544758, + "compression_loss": 0.0, + "distillation_loss": 0.03606577590107918, + "epoch": 3.18, + "learning_rate": 2.8562664885808176e-05, + "loss": 0.0345, + "step": 3353, + "task_loss": 0.020131606608629227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781864797625656, + "compression_loss": 0.0, + "distillation_loss": 0.07601951062679291, + "epoch": 3.19, + "learning_rate": 2.8552118065811868e-05, + "loss": 0.086, + "step": 3354, + "task_loss": 0.17620819807052612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7781993820809836, + "compression_loss": 0.0, + "distillation_loss": 0.032324016094207764, + "epoch": 3.19, + "learning_rate": 2.85415706005562e-05, + "loss": 0.0297, + "step": 3355, + "task_loss": 0.0057275425642728806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7782122793107333, + "compression_loss": 0.0, + "distillation_loss": 0.08825677633285522, + "epoch": 3.19, + "learning_rate": 2.8531022491957178e-05, + "loss": 0.0971, + "step": 3356, + "task_loss": 0.17651137709617615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7782251714528186, + "compression_loss": 0.0, + "distillation_loss": 0.05497463047504425, + "epoch": 3.19, + "learning_rate": 2.852047374193092e-05, + "loss": 0.0514, + "step": 3357, + "task_loss": 0.019507795572280884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778238058508243, + "compression_loss": 0.0, + "distillation_loss": 0.13246670365333557, + "epoch": 3.19, + "learning_rate": 2.850992435239364e-05, + "loss": 0.1283, + "step": 3358, + "task_loss": 0.0911954715847969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7782509404780101, + "compression_loss": 0.0, + "distillation_loss": 0.02414816804230213, + "epoch": 3.19, + "learning_rate": 2.8499374325261708e-05, + "loss": 0.03, + "step": 3359, + "task_loss": 0.08249876648187637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7782638173631239, + "compression_loss": 0.0, + "distillation_loss": 0.041819360107183456, + "epoch": 3.19, + "learning_rate": 2.848882366245157e-05, + "loss": 0.0381, + "step": 3360, + "task_loss": 0.004362896084785461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7782766891645878, + "compression_loss": 0.0, + "distillation_loss": 0.08119907230138779, + "epoch": 3.19, + "learning_rate": 2.847827236587982e-05, + "loss": 0.092, + "step": 3361, + "task_loss": 0.18970143795013428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7782895558834056, + "compression_loss": 0.0, + "distillation_loss": 0.05240718275308609, + "epoch": 3.19, + "learning_rate": 2.846772043746313e-05, + "loss": 0.0514, + "step": 3362, + "task_loss": 0.04256965219974518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778302417520581, + "compression_loss": 0.0, + "distillation_loss": 0.1657370626926422, + "epoch": 3.19, + "learning_rate": 2.845716787911833e-05, + "loss": 0.1659, + "step": 3363, + "task_loss": 0.16752569377422333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783152740771176, + "compression_loss": 0.0, + "distillation_loss": 0.06295058131217957, + "epoch": 3.19, + "learning_rate": 2.8446614692762336e-05, + "loss": 0.0575, + "step": 3364, + "task_loss": 0.008071176707744598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783281255540192, + "compression_loss": 0.0, + "distillation_loss": 0.147892564535141, + "epoch": 3.2, + "learning_rate": 2.843606088031218e-05, + "loss": 0.1436, + "step": 3365, + "task_loss": 0.10463625937700272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783409719522894, + "compression_loss": 0.0, + "distillation_loss": 0.02701820805668831, + "epoch": 3.2, + "learning_rate": 2.842550644368502e-05, + "loss": 0.0337, + "step": 3366, + "task_loss": 0.09333845973014832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783538132729317, + "compression_loss": 0.0, + "distillation_loss": 0.024246441200375557, + "epoch": 3.2, + "learning_rate": 2.841495138479811e-05, + "loss": 0.0222, + "step": 3367, + "task_loss": 0.003893321380019188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783666495169501, + "compression_loss": 0.0, + "distillation_loss": 0.16022557020187378, + "epoch": 3.2, + "learning_rate": 2.8404395705568848e-05, + "loss": 0.1523, + "step": 3368, + "task_loss": 0.08084793388843536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783794806853482, + "compression_loss": 0.0, + "distillation_loss": 0.03452833741903305, + "epoch": 3.2, + "learning_rate": 2.8393839407914702e-05, + "loss": 0.0374, + "step": 3369, + "task_loss": 0.06310579180717468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7783923067791296, + "compression_loss": 0.0, + "distillation_loss": 0.11381056159734726, + "epoch": 3.2, + "learning_rate": 2.8383282493753283e-05, + "loss": 0.1112, + "step": 3370, + "task_loss": 0.08721385896205902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778405127799298, + "compression_loss": 0.0, + "distillation_loss": 0.11384284496307373, + "epoch": 3.2, + "learning_rate": 2.83727249650023e-05, + "loss": 0.1135, + "step": 3371, + "task_loss": 0.11086120456457138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784179437468572, + "compression_loss": 0.0, + "distillation_loss": 0.04285869747400284, + "epoch": 3.2, + "learning_rate": 2.836216682357959e-05, + "loss": 0.063, + "step": 3372, + "task_loss": 0.24387109279632568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784307546228107, + "compression_loss": 0.0, + "distillation_loss": 0.10957767069339752, + "epoch": 3.2, + "learning_rate": 2.8351608071403085e-05, + "loss": 0.1169, + "step": 3373, + "task_loss": 0.18312203884124756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784435604281623, + "compression_loss": 0.0, + "distillation_loss": 0.06287790834903717, + "epoch": 3.2, + "learning_rate": 2.8341048710390832e-05, + "loss": 0.0677, + "step": 3374, + "task_loss": 0.11077691614627838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784563611639156, + "compression_loss": 0.0, + "distillation_loss": 0.035941578447818756, + "epoch": 3.21, + "learning_rate": 2.8330488742460987e-05, + "loss": 0.033, + "step": 3375, + "task_loss": 0.006380628794431686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784691568310744, + "compression_loss": 0.0, + "distillation_loss": 0.14878231287002563, + "epoch": 3.21, + "learning_rate": 2.8319928169531825e-05, + "loss": 0.1419, + "step": 3376, + "task_loss": 0.08023767173290253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784819474306423, + "compression_loss": 0.0, + "distillation_loss": 0.15483559668064117, + "epoch": 3.21, + "learning_rate": 2.830936699352172e-05, + "loss": 0.1589, + "step": 3377, + "task_loss": 0.1951579451560974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7784947329636229, + "compression_loss": 0.0, + "distillation_loss": 0.22917942702770233, + "epoch": 3.21, + "learning_rate": 2.8298805216349167e-05, + "loss": 0.215, + "step": 3378, + "task_loss": 0.08718053251504898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785075134310201, + "compression_loss": 0.0, + "distillation_loss": 0.266732782125473, + "epoch": 3.21, + "learning_rate": 2.8288242839932744e-05, + "loss": 0.2629, + "step": 3379, + "task_loss": 0.2285778522491455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785202888338374, + "compression_loss": 0.0, + "distillation_loss": 0.09135205298662186, + "epoch": 3.21, + "learning_rate": 2.8277679866191194e-05, + "loss": 0.1011, + "step": 3380, + "task_loss": 0.18895608186721802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785330591730785, + "compression_loss": 0.0, + "distillation_loss": 0.10083561390638351, + "epoch": 3.21, + "learning_rate": 2.8267116297043294e-05, + "loss": 0.1137, + "step": 3381, + "task_loss": 0.22926008701324463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785458244497472, + "compression_loss": 0.0, + "distillation_loss": 0.020816469565033913, + "epoch": 3.21, + "learning_rate": 2.8256552134407993e-05, + "loss": 0.0192, + "step": 3382, + "task_loss": 0.004540523514151573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785585846648471, + "compression_loss": 0.0, + "distillation_loss": 0.05450872331857681, + "epoch": 3.21, + "learning_rate": 2.8245987380204313e-05, + "loss": 0.0556, + "step": 3383, + "task_loss": 0.0653524175286293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785713398193818, + "compression_loss": 0.0, + "distillation_loss": 0.05758683755993843, + "epoch": 3.21, + "learning_rate": 2.8235422036351382e-05, + "loss": 0.0611, + "step": 3384, + "task_loss": 0.09230072051286697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785840899143551, + "compression_loss": 0.0, + "distillation_loss": 0.1324225813150406, + "epoch": 3.21, + "learning_rate": 2.822485610476847e-05, + "loss": 0.1397, + "step": 3385, + "task_loss": 0.2054787278175354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7785968349507707, + "compression_loss": 0.0, + "distillation_loss": 0.10634157061576843, + "epoch": 3.22, + "learning_rate": 2.8214289587374908e-05, + "loss": 0.1103, + "step": 3386, + "task_loss": 0.14617042243480682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786095749296322, + "compression_loss": 0.0, + "distillation_loss": 0.1770530492067337, + "epoch": 3.22, + "learning_rate": 2.8203722486090168e-05, + "loss": 0.1791, + "step": 3387, + "task_loss": 0.19720126688480377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786223098519434, + "compression_loss": 0.0, + "distillation_loss": 0.27764710783958435, + "epoch": 3.22, + "learning_rate": 2.8193154802833803e-05, + "loss": 0.2611, + "step": 3388, + "task_loss": 0.11260776221752167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786350397187077, + "compression_loss": 0.0, + "distillation_loss": 0.052394554018974304, + "epoch": 3.22, + "learning_rate": 2.818258653952549e-05, + "loss": 0.0817, + "step": 3389, + "task_loss": 0.3452882766723633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786477645309291, + "compression_loss": 0.0, + "distillation_loss": 0.023975854739546776, + "epoch": 3.22, + "learning_rate": 2.8172017698085013e-05, + "loss": 0.0298, + "step": 3390, + "task_loss": 0.08264005184173584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786604842896112, + "compression_loss": 0.0, + "distillation_loss": 0.16987451910972595, + "epoch": 3.22, + "learning_rate": 2.816144828043224e-05, + "loss": 0.1559, + "step": 3391, + "task_loss": 0.030469371005892754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786731989957576, + "compression_loss": 0.0, + "distillation_loss": 0.08702673763036728, + "epoch": 3.22, + "learning_rate": 2.8150878288487155e-05, + "loss": 0.0822, + "step": 3392, + "task_loss": 0.03916516155004501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778685908650372, + "compression_loss": 0.0, + "distillation_loss": 0.030004359781742096, + "epoch": 3.22, + "learning_rate": 2.8140307724169857e-05, + "loss": 0.0351, + "step": 3393, + "task_loss": 0.08116083592176437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7786986132544582, + "compression_loss": 0.0, + "distillation_loss": 0.18179570138454437, + "epoch": 3.22, + "learning_rate": 2.812973658940054e-05, + "loss": 0.199, + "step": 3394, + "task_loss": 0.3536381423473358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787113128090197, + "compression_loss": 0.0, + "distillation_loss": 0.1652180403470993, + "epoch": 3.22, + "learning_rate": 2.8119164886099504e-05, + "loss": 0.1694, + "step": 3395, + "task_loss": 0.2065887749195099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787240073150603, + "compression_loss": 0.0, + "distillation_loss": 0.10243000090122223, + "epoch": 3.23, + "learning_rate": 2.8108592616187133e-05, + "loss": 0.1072, + "step": 3396, + "task_loss": 0.15036579966545105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787366967735837, + "compression_loss": 0.0, + "distillation_loss": 0.12697230279445648, + "epoch": 3.23, + "learning_rate": 2.8098019781583944e-05, + "loss": 0.1212, + "step": 3397, + "task_loss": 0.06895315647125244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787493811855934, + "compression_loss": 0.0, + "distillation_loss": 0.046805739402770996, + "epoch": 3.23, + "learning_rate": 2.8087446384210547e-05, + "loss": 0.0473, + "step": 3398, + "task_loss": 0.05141756683588028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787620605520934, + "compression_loss": 0.0, + "distillation_loss": 0.024082593619823456, + "epoch": 3.23, + "learning_rate": 2.8076872425987637e-05, + "loss": 0.0225, + "step": 3399, + "task_loss": 0.00831957720220089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787747348740871, + "compression_loss": 0.0, + "distillation_loss": 0.14285123348236084, + "epoch": 3.23, + "learning_rate": 2.8066297908836043e-05, + "loss": 0.1427, + "step": 3400, + "task_loss": 0.14182282984256744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7787874041525783, + "compression_loss": 0.0, + "distillation_loss": 0.08242295682430267, + "epoch": 3.23, + "learning_rate": 2.8055722834676658e-05, + "loss": 0.0958, + "step": 3401, + "task_loss": 0.21660152077674866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788000683885706, + "compression_loss": 0.0, + "distillation_loss": 0.038724031299352646, + "epoch": 3.23, + "learning_rate": 2.804514720543051e-05, + "loss": 0.0355, + "step": 3402, + "task_loss": 0.006068244576454163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788127275830679, + "compression_loss": 0.0, + "distillation_loss": 0.08571316301822662, + "epoch": 3.23, + "learning_rate": 2.80345710230187e-05, + "loss": 0.0914, + "step": 3403, + "task_loss": 0.14219190180301666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788253817370736, + "compression_loss": 0.0, + "distillation_loss": 0.07485407590866089, + "epoch": 3.23, + "learning_rate": 2.802399428936246e-05, + "loss": 0.0825, + "step": 3404, + "task_loss": 0.15130427479743958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788380308515915, + "compression_loss": 0.0, + "distillation_loss": 0.09159151464700699, + "epoch": 3.23, + "learning_rate": 2.8013417006383076e-05, + "loss": 0.09, + "step": 3405, + "task_loss": 0.07594650983810425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788506749276254, + "compression_loss": 0.0, + "distillation_loss": 0.0715874582529068, + "epoch": 3.23, + "learning_rate": 2.8002839176001987e-05, + "loss": 0.0843, + "step": 3406, + "task_loss": 0.19857263565063477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788633139661788, + "compression_loss": 0.0, + "distillation_loss": 0.13416993618011475, + "epoch": 3.24, + "learning_rate": 2.799226080014071e-05, + "loss": 0.1421, + "step": 3407, + "task_loss": 0.21369251608848572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7788759479682554, + "compression_loss": 0.0, + "distillation_loss": 0.09951282292604446, + "epoch": 3.24, + "learning_rate": 2.7981681880720838e-05, + "loss": 0.0955, + "step": 3408, + "task_loss": 0.058943700045347214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778888576934859, + "compression_loss": 0.0, + "distillation_loss": 0.3170393109321594, + "epoch": 3.24, + "learning_rate": 2.7971102419664103e-05, + "loss": 0.3122, + "step": 3409, + "task_loss": 0.2684779763221741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789012008669933, + "compression_loss": 0.0, + "distillation_loss": 0.04012390226125717, + "epoch": 3.24, + "learning_rate": 2.7960522418892288e-05, + "loss": 0.0552, + "step": 3410, + "task_loss": 0.1905803382396698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789138197656618, + "compression_loss": 0.0, + "distillation_loss": 0.08090061694383621, + "epoch": 3.24, + "learning_rate": 2.794994188032733e-05, + "loss": 0.0841, + "step": 3411, + "task_loss": 0.11296429485082626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789264336318683, + "compression_loss": 0.0, + "distillation_loss": 0.08891788125038147, + "epoch": 3.24, + "learning_rate": 2.7939360805891218e-05, + "loss": 0.0852, + "step": 3412, + "task_loss": 0.05205078423023224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789390424666165, + "compression_loss": 0.0, + "distillation_loss": 0.13765235245227814, + "epoch": 3.24, + "learning_rate": 2.7928779197506056e-05, + "loss": 0.1313, + "step": 3413, + "task_loss": 0.07377782464027405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789516462709101, + "compression_loss": 0.0, + "distillation_loss": 0.0779227688908577, + "epoch": 3.24, + "learning_rate": 2.7918197057094054e-05, + "loss": 0.0794, + "step": 3414, + "task_loss": 0.09290515631437302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789642450457527, + "compression_loss": 0.0, + "distillation_loss": 0.15904906392097473, + "epoch": 3.24, + "learning_rate": 2.7907614386577497e-05, + "loss": 0.1513, + "step": 3415, + "task_loss": 0.08202332258224487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.778976838792148, + "compression_loss": 0.0, + "distillation_loss": 0.05591778829693794, + "epoch": 3.24, + "learning_rate": 2.789703118787879e-05, + "loss": 0.066, + "step": 3416, + "task_loss": 0.15640464425086975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7789894275110996, + "compression_loss": 0.0, + "distillation_loss": 0.2858087122440338, + "epoch": 3.25, + "learning_rate": 2.7886447462920412e-05, + "loss": 0.2745, + "step": 3417, + "task_loss": 0.17287422716617584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790020112036113, + "compression_loss": 0.0, + "distillation_loss": 0.05627952516078949, + "epoch": 3.25, + "learning_rate": 2.787586321362495e-05, + "loss": 0.0517, + "step": 3418, + "task_loss": 0.010653091594576836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790145898706868, + "compression_loss": 0.0, + "distillation_loss": 0.15222644805908203, + "epoch": 3.25, + "learning_rate": 2.7865278441915082e-05, + "loss": 0.1488, + "step": 3419, + "task_loss": 0.11802099645137787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790271635133298, + "compression_loss": 0.0, + "distillation_loss": 0.05383795499801636, + "epoch": 3.25, + "learning_rate": 2.785469314971359e-05, + "loss": 0.0516, + "step": 3420, + "task_loss": 0.03138484060764313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779039732132544, + "compression_loss": 0.0, + "distillation_loss": 0.03762197121977806, + "epoch": 3.25, + "learning_rate": 2.7844107338943343e-05, + "loss": 0.047, + "step": 3421, + "task_loss": 0.13129746913909912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790522957293328, + "compression_loss": 0.0, + "distillation_loss": 0.13689181208610535, + "epoch": 3.25, + "learning_rate": 2.7833521011527293e-05, + "loss": 0.1453, + "step": 3422, + "task_loss": 0.221421480178833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790648543047002, + "compression_loss": 0.0, + "distillation_loss": 0.2846853733062744, + "epoch": 3.25, + "learning_rate": 2.782293416938851e-05, + "loss": 0.2751, + "step": 3423, + "task_loss": 0.18841396272182465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790774078596497, + "compression_loss": 0.0, + "distillation_loss": 0.05968084558844566, + "epoch": 3.25, + "learning_rate": 2.7812346814450135e-05, + "loss": 0.078, + "step": 3424, + "task_loss": 0.2424488067626953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7790899563951851, + "compression_loss": 0.0, + "distillation_loss": 0.08280298113822937, + "epoch": 3.25, + "learning_rate": 2.7801758948635414e-05, + "loss": 0.0881, + "step": 3425, + "task_loss": 0.13536496460437775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791024999123101, + "compression_loss": 0.0, + "distillation_loss": 0.11989252269268036, + "epoch": 3.25, + "learning_rate": 2.77911705738677e-05, + "loss": 0.1164, + "step": 3426, + "task_loss": 0.08493325114250183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791150384120282, + "compression_loss": 0.0, + "distillation_loss": 0.1451410949230194, + "epoch": 3.25, + "learning_rate": 2.7780581692070395e-05, + "loss": 0.1492, + "step": 3427, + "task_loss": 0.18578004837036133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791275718953432, + "compression_loss": 0.0, + "distillation_loss": 0.16686482727527618, + "epoch": 3.26, + "learning_rate": 2.7769992305167043e-05, + "loss": 0.1621, + "step": 3428, + "task_loss": 0.11886778473854065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791401003632589, + "compression_loss": 0.0, + "distillation_loss": 0.057650670409202576, + "epoch": 3.26, + "learning_rate": 2.775940241508124e-05, + "loss": 0.057, + "step": 3429, + "task_loss": 0.05127601698040962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791526238167787, + "compression_loss": 0.0, + "distillation_loss": 0.053712643682956696, + "epoch": 3.26, + "learning_rate": 2.774881202373671e-05, + "loss": 0.064, + "step": 3430, + "task_loss": 0.1569092869758606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791651422569066, + "compression_loss": 0.0, + "distillation_loss": 0.04081248864531517, + "epoch": 3.26, + "learning_rate": 2.773822113305723e-05, + "loss": 0.0428, + "step": 3431, + "task_loss": 0.061016641557216644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791776556846461, + "compression_loss": 0.0, + "distillation_loss": 0.06572188436985016, + "epoch": 3.26, + "learning_rate": 2.7727629744966695e-05, + "loss": 0.067, + "step": 3432, + "task_loss": 0.07893720269203186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7791901641010008, + "compression_loss": 0.0, + "distillation_loss": 0.02726941555738449, + "epoch": 3.26, + "learning_rate": 2.7717037861389082e-05, + "loss": 0.0256, + "step": 3433, + "task_loss": 0.010105656459927559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792026675069746, + "compression_loss": 0.0, + "distillation_loss": 0.030473362654447556, + "epoch": 3.26, + "learning_rate": 2.7706445484248454e-05, + "loss": 0.0291, + "step": 3434, + "task_loss": 0.017233194783329964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792151659035711, + "compression_loss": 0.0, + "distillation_loss": 0.2087365984916687, + "epoch": 3.26, + "learning_rate": 2.769585261546897e-05, + "loss": 0.2018, + "step": 3435, + "task_loss": 0.13944987952709198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792276592917939, + "compression_loss": 0.0, + "distillation_loss": 0.04427679255604744, + "epoch": 3.26, + "learning_rate": 2.768525925697487e-05, + "loss": 0.0431, + "step": 3436, + "task_loss": 0.03255880996584892 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792401476726467, + "compression_loss": 0.0, + "distillation_loss": 0.10959358513355255, + "epoch": 3.26, + "learning_rate": 2.76746654106905e-05, + "loss": 0.1043, + "step": 3437, + "task_loss": 0.05700678750872612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792526310471333, + "compression_loss": 0.0, + "distillation_loss": 0.18829983472824097, + "epoch": 3.26, + "learning_rate": 2.7664071078540282e-05, + "loss": 0.1821, + "step": 3438, + "task_loss": 0.12653696537017822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792651094162573, + "compression_loss": 0.0, + "distillation_loss": 0.12829965353012085, + "epoch": 3.27, + "learning_rate": 2.7653476262448713e-05, + "loss": 0.1339, + "step": 3439, + "task_loss": 0.18381281197071075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792775827810223, + "compression_loss": 0.0, + "distillation_loss": 0.06626725196838379, + "epoch": 3.27, + "learning_rate": 2.76428809643404e-05, + "loss": 0.062, + "step": 3440, + "task_loss": 0.023818595334887505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7792900511424321, + "compression_loss": 0.0, + "distillation_loss": 0.03690072149038315, + "epoch": 3.27, + "learning_rate": 2.763228518614004e-05, + "loss": 0.0498, + "step": 3441, + "task_loss": 0.16629740595817566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793025145014904, + "compression_loss": 0.0, + "distillation_loss": 0.04804535210132599, + "epoch": 3.27, + "learning_rate": 2.7621688929772393e-05, + "loss": 0.0533, + "step": 3442, + "task_loss": 0.10066729038953781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793149728592008, + "compression_loss": 0.0, + "distillation_loss": 0.1166885569691658, + "epoch": 3.27, + "learning_rate": 2.761109219716233e-05, + "loss": 0.1143, + "step": 3443, + "task_loss": 0.09277547895908356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793274262165669, + "compression_loss": 0.0, + "distillation_loss": 0.10858124494552612, + "epoch": 3.27, + "learning_rate": 2.760049499023479e-05, + "loss": 0.113, + "step": 3444, + "task_loss": 0.15251390635967255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793398745745926, + "compression_loss": 0.0, + "distillation_loss": 0.03859622776508331, + "epoch": 3.27, + "learning_rate": 2.7589897310914814e-05, + "loss": 0.0443, + "step": 3445, + "task_loss": 0.09562104940414429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793523179342815, + "compression_loss": 0.0, + "distillation_loss": 0.030183183029294014, + "epoch": 3.27, + "learning_rate": 2.7579299161127513e-05, + "loss": 0.0287, + "step": 3446, + "task_loss": 0.01539078913629055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793647562966373, + "compression_loss": 0.0, + "distillation_loss": 0.06578610837459564, + "epoch": 3.27, + "learning_rate": 2.756870054279811e-05, + "loss": 0.0839, + "step": 3447, + "task_loss": 0.24706564843654633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7793771896626636, + "compression_loss": 0.0, + "distillation_loss": 0.2527296543121338, + "epoch": 3.27, + "learning_rate": 2.755810145785187e-05, + "loss": 0.2617, + "step": 3448, + "task_loss": 0.3425305485725403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779389618033364, + "compression_loss": 0.0, + "distillation_loss": 0.10953805595636368, + "epoch": 3.28, + "learning_rate": 2.754750190821418e-05, + "loss": 0.1019, + "step": 3449, + "task_loss": 0.03293545916676521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794020414097425, + "compression_loss": 0.0, + "distillation_loss": 0.2223641574382782, + "epoch": 3.28, + "learning_rate": 2.753690189581051e-05, + "loss": 0.2129, + "step": 3450, + "task_loss": 0.12748362123966217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794144597928024, + "compression_loss": 0.0, + "distillation_loss": 0.07554468512535095, + "epoch": 3.28, + "learning_rate": 2.752630142256638e-05, + "loss": 0.0726, + "step": 3451, + "task_loss": 0.045700106769800186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794268731835476, + "compression_loss": 0.0, + "distillation_loss": 0.0460401251912117, + "epoch": 3.28, + "learning_rate": 2.7515700490407443e-05, + "loss": 0.0571, + "step": 3452, + "task_loss": 0.15684375166893005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794392815829819, + "compression_loss": 0.0, + "distillation_loss": 0.07692290842533112, + "epoch": 3.28, + "learning_rate": 2.7505099101259386e-05, + "loss": 0.0725, + "step": 3453, + "task_loss": 0.03274589031934738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794516849921087, + "compression_loss": 0.0, + "distillation_loss": 0.1473102867603302, + "epoch": 3.28, + "learning_rate": 2.749449725704802e-05, + "loss": 0.1434, + "step": 3454, + "task_loss": 0.1077522411942482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794640834119319, + "compression_loss": 0.0, + "distillation_loss": 0.10021187365055084, + "epoch": 3.28, + "learning_rate": 2.748389495969921e-05, + "loss": 0.1095, + "step": 3455, + "task_loss": 0.19332432746887207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779476476843455, + "compression_loss": 0.0, + "distillation_loss": 0.03207477182149887, + "epoch": 3.28, + "learning_rate": 2.747329221113891e-05, + "loss": 0.0303, + "step": 3456, + "task_loss": 0.01403326727449894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7794888652876819, + "compression_loss": 0.0, + "distillation_loss": 0.1490982323884964, + "epoch": 3.28, + "learning_rate": 2.7462689013293176e-05, + "loss": 0.1534, + "step": 3457, + "task_loss": 0.1916988492012024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795012487456161, + "compression_loss": 0.0, + "distillation_loss": 0.07416212558746338, + "epoch": 3.28, + "learning_rate": 2.745208536808812e-05, + "loss": 0.0814, + "step": 3458, + "task_loss": 0.14629532396793365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795136272182613, + "compression_loss": 0.0, + "distillation_loss": 0.3036563992500305, + "epoch": 3.28, + "learning_rate": 2.7441481277449954e-05, + "loss": 0.2975, + "step": 3459, + "task_loss": 0.24229754507541656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795260007066213, + "compression_loss": 0.0, + "distillation_loss": 0.05779052898287773, + "epoch": 3.29, + "learning_rate": 2.743087674330495e-05, + "loss": 0.0536, + "step": 3460, + "task_loss": 0.016016999259591103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795383692116996, + "compression_loss": 0.0, + "distillation_loss": 0.05794607847929001, + "epoch": 3.29, + "learning_rate": 2.742027176757948e-05, + "loss": 0.054, + "step": 3461, + "task_loss": 0.018953843042254448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795507327345002, + "compression_loss": 0.0, + "distillation_loss": 0.13176500797271729, + "epoch": 3.29, + "learning_rate": 2.7409666352199986e-05, + "loss": 0.1273, + "step": 3462, + "task_loss": 0.08700872212648392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795630912760264, + "compression_loss": 0.0, + "distillation_loss": 0.03117290511727333, + "epoch": 3.29, + "learning_rate": 2.7399060499092992e-05, + "loss": 0.0383, + "step": 3463, + "task_loss": 0.10199519991874695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779575444837282, + "compression_loss": 0.0, + "distillation_loss": 0.03322270140051842, + "epoch": 3.29, + "learning_rate": 2.7388454210185115e-05, + "loss": 0.0369, + "step": 3464, + "task_loss": 0.0694967657327652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7795877934192709, + "compression_loss": 0.0, + "distillation_loss": 0.06944756209850311, + "epoch": 3.29, + "learning_rate": 2.7377847487403018e-05, + "loss": 0.0634, + "step": 3465, + "task_loss": 0.008718544617295265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7796001370229966, + "compression_loss": 0.0, + "distillation_loss": 0.07945192605257034, + "epoch": 3.29, + "learning_rate": 2.736724033267347e-05, + "loss": 0.0837, + "step": 3466, + "task_loss": 0.12163371592760086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7796124756494627, + "compression_loss": 0.0, + "distillation_loss": 0.04697978496551514, + "epoch": 3.29, + "learning_rate": 2.7356632747923322e-05, + "loss": 0.0436, + "step": 3467, + "task_loss": 0.013647403568029404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779624809299673, + "compression_loss": 0.0, + "distillation_loss": 0.10399874299764633, + "epoch": 3.29, + "learning_rate": 2.7346024735079486e-05, + "loss": 0.1006, + "step": 3468, + "task_loss": 0.06967251002788544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7796371379746312, + "compression_loss": 0.0, + "distillation_loss": 0.20231911540031433, + "epoch": 3.29, + "learning_rate": 2.7335416296068962e-05, + "loss": 0.204, + "step": 3469, + "task_loss": 0.21951937675476074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779649461675341, + "compression_loss": 0.0, + "distillation_loss": 0.09952305257320404, + "epoch": 3.3, + "learning_rate": 2.7324807432818805e-05, + "loss": 0.093, + "step": 3470, + "task_loss": 0.033969540148973465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7796617804028059, + "compression_loss": 0.0, + "distillation_loss": 0.2023647427558899, + "epoch": 3.3, + "learning_rate": 2.731419814725619e-05, + "loss": 0.1988, + "step": 3471, + "task_loss": 0.16686657071113586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7796740941580298, + "compression_loss": 0.0, + "distillation_loss": 0.019200827926397324, + "epoch": 3.3, + "learning_rate": 2.730358844130834e-05, + "loss": 0.0176, + "step": 3472, + "task_loss": 0.0035965926945209503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7796864029420163, + "compression_loss": 0.0, + "distillation_loss": 0.10850808024406433, + "epoch": 3.3, + "learning_rate": 2.729297831690255e-05, + "loss": 0.1036, + "step": 3473, + "task_loss": 0.059668056666851044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779698706755769, + "compression_loss": 0.0, + "distillation_loss": 0.1171179711818695, + "epoch": 3.3, + "learning_rate": 2.728236777596621e-05, + "loss": 0.1114, + "step": 3474, + "task_loss": 0.0599634051322937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797110056002917, + "compression_loss": 0.0, + "distillation_loss": 0.023795567452907562, + "epoch": 3.3, + "learning_rate": 2.7271756820426763e-05, + "loss": 0.0218, + "step": 3475, + "task_loss": 0.003354804590344429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797232994765881, + "compression_loss": 0.0, + "distillation_loss": 0.05289144068956375, + "epoch": 3.3, + "learning_rate": 2.7261145452211763e-05, + "loss": 0.0625, + "step": 3476, + "task_loss": 0.14865252375602722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797355883856617, + "compression_loss": 0.0, + "distillation_loss": 0.26715028285980225, + "epoch": 3.3, + "learning_rate": 2.725053367324879e-05, + "loss": 0.271, + "step": 3477, + "task_loss": 0.3059850335121155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797478723285164, + "compression_loss": 0.0, + "distillation_loss": 0.03925115987658501, + "epoch": 3.3, + "learning_rate": 2.723992148546554e-05, + "loss": 0.0448, + "step": 3478, + "task_loss": 0.0945422425866127 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797601513061558, + "compression_loss": 0.0, + "distillation_loss": 0.10821853578090668, + "epoch": 3.3, + "learning_rate": 2.7229308890789767e-05, + "loss": 0.1106, + "step": 3479, + "task_loss": 0.13163337111473083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797724253195835, + "compression_loss": 0.0, + "distillation_loss": 0.026758499443531036, + "epoch": 3.3, + "learning_rate": 2.7218695891149293e-05, + "loss": 0.0359, + "step": 3480, + "task_loss": 0.11779153347015381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797846943698032, + "compression_loss": 0.0, + "distillation_loss": 0.04860454425215721, + "epoch": 3.31, + "learning_rate": 2.720808248847203e-05, + "loss": 0.0449, + "step": 3481, + "task_loss": 0.011549010872840881 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7797969584578188, + "compression_loss": 0.0, + "distillation_loss": 0.11690667271614075, + "epoch": 3.31, + "learning_rate": 2.719746868468595e-05, + "loss": 0.1101, + "step": 3482, + "task_loss": 0.04849759489297867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798092175846337, + "compression_loss": 0.0, + "distillation_loss": 0.13424092531204224, + "epoch": 3.31, + "learning_rate": 2.7186854481719092e-05, + "loss": 0.1333, + "step": 3483, + "task_loss": 0.12473595142364502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798214717512517, + "compression_loss": 0.0, + "distillation_loss": 0.07486072927713394, + "epoch": 3.31, + "learning_rate": 2.7176239881499595e-05, + "loss": 0.0961, + "step": 3484, + "task_loss": 0.28720682859420776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798337209586765, + "compression_loss": 0.0, + "distillation_loss": 0.1800866276025772, + "epoch": 3.31, + "learning_rate": 2.716562488595563e-05, + "loss": 0.1734, + "step": 3485, + "task_loss": 0.11275843530893326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798459652079118, + "compression_loss": 0.0, + "distillation_loss": 0.045273929834365845, + "epoch": 3.31, + "learning_rate": 2.715500949701549e-05, + "loss": 0.0426, + "step": 3486, + "task_loss": 0.0184002872556448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798582044999612, + "compression_loss": 0.0, + "distillation_loss": 0.07528147101402283, + "epoch": 3.31, + "learning_rate": 2.7144393716607486e-05, + "loss": 0.0723, + "step": 3487, + "task_loss": 0.04534187912940979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798704388358284, + "compression_loss": 0.0, + "distillation_loss": 0.1249753087759018, + "epoch": 3.31, + "learning_rate": 2.713377754666004e-05, + "loss": 0.1209, + "step": 3488, + "task_loss": 0.08464540541172028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7798826682165172, + "compression_loss": 0.0, + "distillation_loss": 0.2507629990577698, + "epoch": 3.31, + "learning_rate": 2.712316098910162e-05, + "loss": 0.2497, + "step": 3489, + "task_loss": 0.23983043432235718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.779894892643031, + "compression_loss": 0.0, + "distillation_loss": 0.017407327890396118, + "epoch": 3.31, + "learning_rate": 2.711254404586079e-05, + "loss": 0.0162, + "step": 3490, + "task_loss": 0.004980321973562241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799071121163739, + "compression_loss": 0.0, + "distillation_loss": 0.16080042719841003, + "epoch": 3.32, + "learning_rate": 2.7101926718866156e-05, + "loss": 0.1569, + "step": 3491, + "task_loss": 0.12228196114301682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799193266375491, + "compression_loss": 0.0, + "distillation_loss": 0.20948654413223267, + "epoch": 3.32, + "learning_rate": 2.7091309010046408e-05, + "loss": 0.2093, + "step": 3492, + "task_loss": 0.2079431563615799 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799315362075607, + "compression_loss": 0.0, + "distillation_loss": 0.11191666126251221, + "epoch": 3.32, + "learning_rate": 2.708069092133031e-05, + "loss": 0.1132, + "step": 3493, + "task_loss": 0.12430918961763382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799437408274122, + "compression_loss": 0.0, + "distillation_loss": 0.11181183904409409, + "epoch": 3.32, + "learning_rate": 2.7070072454646683e-05, + "loss": 0.106, + "step": 3494, + "task_loss": 0.05379234626889229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799559404981073, + "compression_loss": 0.0, + "distillation_loss": 0.09605347365140915, + "epoch": 3.32, + "learning_rate": 2.7059453611924433e-05, + "loss": 0.0923, + "step": 3495, + "task_loss": 0.058432966470718384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799681352206498, + "compression_loss": 0.0, + "distillation_loss": 0.06996072828769684, + "epoch": 3.32, + "learning_rate": 2.7048834395092505e-05, + "loss": 0.0752, + "step": 3496, + "task_loss": 0.12205319851636887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799803249960431, + "compression_loss": 0.0, + "distillation_loss": 0.09135409444570541, + "epoch": 3.32, + "learning_rate": 2.7038214806079948e-05, + "loss": 0.0947, + "step": 3497, + "task_loss": 0.12439997494220734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7799925098252911, + "compression_loss": 0.0, + "distillation_loss": 0.04232946038246155, + "epoch": 3.32, + "learning_rate": 2.702759484681585e-05, + "loss": 0.0455, + "step": 3498, + "task_loss": 0.07386565208435059 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800046897093974, + "compression_loss": 0.0, + "distillation_loss": 0.06012481451034546, + "epoch": 3.32, + "learning_rate": 2.701697451922939e-05, + "loss": 0.0557, + "step": 3499, + "task_loss": 0.015828527510166168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800168646493657, + "compression_loss": 0.0, + "distillation_loss": 0.13709309697151184, + "epoch": 3.32, + "learning_rate": 2.7006353825249792e-05, + "loss": 0.1315, + "step": 3500, + "task_loss": 0.08080706745386124 + }, + { + "epoch": 3.32, + "eval_accuracy": 0.8967889908256881, + "eval_loss": 0.41444137692451477, + "eval_runtime": 18.5229, + "eval_samples_per_second": 47.077, + "eval_steps_per_second": 5.885, + "step": 3500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800290346461998, + "compression_loss": 0.0, + "distillation_loss": 0.05133210867643356, + "epoch": 3.32, + "learning_rate": 2.6995732766806354e-05, + "loss": 0.0478, + "step": 3501, + "task_loss": 0.015585673972964287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800411997009031, + "compression_loss": 0.0, + "distillation_loss": 0.04098585247993469, + "epoch": 3.33, + "learning_rate": 2.6985111345828452e-05, + "loss": 0.0458, + "step": 3502, + "task_loss": 0.0887080505490303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800533598144795, + "compression_loss": 0.0, + "distillation_loss": 0.07880129665136337, + "epoch": 3.33, + "learning_rate": 2.6974489564245513e-05, + "loss": 0.0739, + "step": 3503, + "task_loss": 0.030195550993084908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800655149879328, + "compression_loss": 0.0, + "distillation_loss": 0.07193966954946518, + "epoch": 3.33, + "learning_rate": 2.6963867423987032e-05, + "loss": 0.0733, + "step": 3504, + "task_loss": 0.08535440266132355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7800776652222663, + "compression_loss": 0.0, + "distillation_loss": 0.23703370988368988, + "epoch": 3.33, + "learning_rate": 2.695324492698258e-05, + "loss": 0.2278, + "step": 3505, + "task_loss": 0.14506091177463531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780089810518484, + "compression_loss": 0.0, + "distillation_loss": 0.27318769693374634, + "epoch": 3.33, + "learning_rate": 2.694262207516178e-05, + "loss": 0.2656, + "step": 3506, + "task_loss": 0.1974327713251114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801019508775895, + "compression_loss": 0.0, + "distillation_loss": 0.03283765912055969, + "epoch": 3.33, + "learning_rate": 2.6931998870454327e-05, + "loss": 0.0311, + "step": 3507, + "task_loss": 0.015567878261208534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801140863005865, + "compression_loss": 0.0, + "distillation_loss": 0.08824481815099716, + "epoch": 3.33, + "learning_rate": 2.692137531478997e-05, + "loss": 0.0837, + "step": 3508, + "task_loss": 0.042351722717285156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801262167884786, + "compression_loss": 0.0, + "distillation_loss": 0.10943618416786194, + "epoch": 3.33, + "learning_rate": 2.6910751410098532e-05, + "loss": 0.1129, + "step": 3509, + "task_loss": 0.14431238174438477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801383423422695, + "compression_loss": 0.0, + "distillation_loss": 0.10514090955257416, + "epoch": 3.33, + "learning_rate": 2.6900127158309903e-05, + "loss": 0.1055, + "step": 3510, + "task_loss": 0.10892733186483383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780150462962963, + "compression_loss": 0.0, + "distillation_loss": 0.1573885828256607, + "epoch": 3.33, + "learning_rate": 2.688950256135402e-05, + "loss": 0.1667, + "step": 3511, + "task_loss": 0.2509816586971283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801625786515627, + "compression_loss": 0.0, + "distillation_loss": 0.0467422753572464, + "epoch": 3.34, + "learning_rate": 2.6878877621160904e-05, + "loss": 0.0526, + "step": 3512, + "task_loss": 0.10526955127716064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801746894090722, + "compression_loss": 0.0, + "distillation_loss": 0.0356249138712883, + "epoch": 3.34, + "learning_rate": 2.686825233966061e-05, + "loss": 0.0428, + "step": 3513, + "task_loss": 0.10746340453624725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801867952364953, + "compression_loss": 0.0, + "distillation_loss": 0.07927364856004715, + "epoch": 3.34, + "learning_rate": 2.6857626718783285e-05, + "loss": 0.0928, + "step": 3514, + "task_loss": 0.21453584730625153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7801988961348355, + "compression_loss": 0.0, + "distillation_loss": 0.21596963703632355, + "epoch": 3.34, + "learning_rate": 2.6847000760459118e-05, + "loss": 0.2164, + "step": 3515, + "task_loss": 0.22040767967700958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802109921050968, + "compression_loss": 0.0, + "distillation_loss": 0.04428340867161751, + "epoch": 3.34, + "learning_rate": 2.683637446661837e-05, + "loss": 0.0504, + "step": 3516, + "task_loss": 0.10571881383657455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802230831482826, + "compression_loss": 0.0, + "distillation_loss": 0.020135464146733284, + "epoch": 3.34, + "learning_rate": 2.6825747839191362e-05, + "loss": 0.0284, + "step": 3517, + "task_loss": 0.10235545039176941 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802351692653968, + "compression_loss": 0.0, + "distillation_loss": 0.09116624295711517, + "epoch": 3.34, + "learning_rate": 2.681512088010845e-05, + "loss": 0.0861, + "step": 3518, + "task_loss": 0.04042452201247215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802472504574429, + "compression_loss": 0.0, + "distillation_loss": 0.21908923983573914, + "epoch": 3.34, + "learning_rate": 2.6804493591300105e-05, + "loss": 0.2111, + "step": 3519, + "task_loss": 0.13956592977046967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802593267254246, + "compression_loss": 0.0, + "distillation_loss": 0.09275214374065399, + "epoch": 3.34, + "learning_rate": 2.6793865974696803e-05, + "loss": 0.0882, + "step": 3520, + "task_loss": 0.046760689467191696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802713980703457, + "compression_loss": 0.0, + "distillation_loss": 0.031631097197532654, + "epoch": 3.34, + "learning_rate": 2.67832380322291e-05, + "loss": 0.0295, + "step": 3521, + "task_loss": 0.01028413511812687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802834644932098, + "compression_loss": 0.0, + "distillation_loss": 0.09307868778705597, + "epoch": 3.34, + "learning_rate": 2.6772609765827627e-05, + "loss": 0.1006, + "step": 3522, + "task_loss": 0.16859915852546692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7802955259950206, + "compression_loss": 0.0, + "distillation_loss": 0.07208751142024994, + "epoch": 3.35, + "learning_rate": 2.6761981177423052e-05, + "loss": 0.0748, + "step": 3523, + "task_loss": 0.09921170026063919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7803075825767818, + "compression_loss": 0.0, + "distillation_loss": 0.05329529941082001, + "epoch": 3.35, + "learning_rate": 2.6751352268946118e-05, + "loss": 0.0492, + "step": 3524, + "task_loss": 0.012433096766471863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7803196342394971, + "compression_loss": 0.0, + "distillation_loss": 0.13451752066612244, + "epoch": 3.35, + "learning_rate": 2.6740723042327598e-05, + "loss": 0.1327, + "step": 3525, + "task_loss": 0.11605449765920639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78033168098417, + "compression_loss": 0.0, + "distillation_loss": 0.08334813266992569, + "epoch": 3.35, + "learning_rate": 2.673009349949836e-05, + "loss": 0.1004, + "step": 3526, + "task_loss": 0.2539750039577484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7803437228118045, + "compression_loss": 0.0, + "distillation_loss": 0.11693807691335678, + "epoch": 3.35, + "learning_rate": 2.6719463642389302e-05, + "loss": 0.1105, + "step": 3527, + "task_loss": 0.05235512554645538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780355759723404, + "compression_loss": 0.0, + "distillation_loss": 0.04535358399152756, + "epoch": 3.35, + "learning_rate": 2.6708833472931394e-05, + "loss": 0.0491, + "step": 3528, + "task_loss": 0.08303047716617584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7803677917199723, + "compression_loss": 0.0, + "distillation_loss": 0.058416105806827545, + "epoch": 3.35, + "learning_rate": 2.669820299305566e-05, + "loss": 0.0618, + "step": 3529, + "task_loss": 0.09264393150806427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7803798188025132, + "compression_loss": 0.0, + "distillation_loss": 0.0770866721868515, + "epoch": 3.35, + "learning_rate": 2.6687572204693174e-05, + "loss": 0.0906, + "step": 3530, + "task_loss": 0.2119910567998886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7803918409720301, + "compression_loss": 0.0, + "distillation_loss": 0.042178817093372345, + "epoch": 3.35, + "learning_rate": 2.667694110977506e-05, + "loss": 0.0418, + "step": 3531, + "task_loss": 0.03879820555448532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7804038582295268, + "compression_loss": 0.0, + "distillation_loss": 0.11523336172103882, + "epoch": 3.35, + "learning_rate": 2.6666309710232522e-05, + "loss": 0.1115, + "step": 3532, + "task_loss": 0.078341543674469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7804158705760071, + "compression_loss": 0.0, + "distillation_loss": 0.1296311616897583, + "epoch": 3.36, + "learning_rate": 2.6655678007996804e-05, + "loss": 0.1225, + "step": 3533, + "task_loss": 0.057877760380506516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7804278780124746, + "compression_loss": 0.0, + "distillation_loss": 0.04751583933830261, + "epoch": 3.36, + "learning_rate": 2.66450460049992e-05, + "loss": 0.0501, + "step": 3534, + "task_loss": 0.07348724454641342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7804398805399331, + "compression_loss": 0.0, + "distillation_loss": 0.032740626484155655, + "epoch": 3.36, + "learning_rate": 2.6634413703171058e-05, + "loss": 0.0306, + "step": 3535, + "task_loss": 0.011288370937108994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780451878159386, + "compression_loss": 0.0, + "distillation_loss": 0.02693828195333481, + "epoch": 3.36, + "learning_rate": 2.6623781104443806e-05, + "loss": 0.0299, + "step": 3536, + "task_loss": 0.05642160773277283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7804638708718372, + "compression_loss": 0.0, + "distillation_loss": 0.1141064316034317, + "epoch": 3.36, + "learning_rate": 2.6613148210748894e-05, + "loss": 0.1129, + "step": 3537, + "task_loss": 0.10202029347419739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7804758586782903, + "compression_loss": 0.0, + "distillation_loss": 0.05455036461353302, + "epoch": 3.36, + "learning_rate": 2.6602515024017842e-05, + "loss": 0.0595, + "step": 3538, + "task_loss": 0.1041443794965744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780487841579749, + "compression_loss": 0.0, + "distillation_loss": 0.03301164135336876, + "epoch": 3.36, + "learning_rate": 2.6591881546182216e-05, + "loss": 0.0302, + "step": 3539, + "task_loss": 0.005180429667234421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780499819577217, + "compression_loss": 0.0, + "distillation_loss": 0.06954570859670639, + "epoch": 3.36, + "learning_rate": 2.6581247779173635e-05, + "loss": 0.069, + "step": 3540, + "task_loss": 0.06367438286542892 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780511792671698, + "compression_loss": 0.0, + "distillation_loss": 0.02071313187479973, + "epoch": 3.36, + "learning_rate": 2.6570613724923788e-05, + "loss": 0.019, + "step": 3541, + "task_loss": 0.003429897129535675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805237608641957, + "compression_loss": 0.0, + "distillation_loss": 0.01836605742573738, + "epoch": 3.36, + "learning_rate": 2.655997938536439e-05, + "loss": 0.017, + "step": 3542, + "task_loss": 0.004780923947691917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805357241557137, + "compression_loss": 0.0, + "distillation_loss": 0.031529366970062256, + "epoch": 3.36, + "learning_rate": 2.654934476242723e-05, + "loss": 0.0292, + "step": 3543, + "task_loss": 0.008400822058320045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805476825472557, + "compression_loss": 0.0, + "distillation_loss": 0.026692230254411697, + "epoch": 3.37, + "learning_rate": 2.653870985804412e-05, + "loss": 0.0248, + "step": 3544, + "task_loss": 0.007382074370980263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805596360398255, + "compression_loss": 0.0, + "distillation_loss": 0.036252401769161224, + "epoch": 3.37, + "learning_rate": 2.6528074674146963e-05, + "loss": 0.0462, + "step": 3545, + "task_loss": 0.13550357520580292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805715846344266, + "compression_loss": 0.0, + "distillation_loss": 0.031404148787260056, + "epoch": 3.37, + "learning_rate": 2.6517439212667677e-05, + "loss": 0.0292, + "step": 3546, + "task_loss": 0.009732730686664581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805835283320628, + "compression_loss": 0.0, + "distillation_loss": 0.08225059509277344, + "epoch": 3.37, + "learning_rate": 2.6506803475538256e-05, + "loss": 0.0909, + "step": 3547, + "task_loss": 0.16853067278862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7805954671337377, + "compression_loss": 0.0, + "distillation_loss": 0.028768369928002357, + "epoch": 3.37, + "learning_rate": 2.649616746469072e-05, + "loss": 0.0268, + "step": 3548, + "task_loss": 0.009422983974218369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806074010404551, + "compression_loss": 0.0, + "distillation_loss": 0.11904192715883255, + "epoch": 3.37, + "learning_rate": 2.648553118205716e-05, + "loss": 0.1248, + "step": 3549, + "task_loss": 0.17703823745250702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806193300532185, + "compression_loss": 0.0, + "distillation_loss": 0.03019685298204422, + "epoch": 3.37, + "learning_rate": 2.6474894629569713e-05, + "loss": 0.0293, + "step": 3550, + "task_loss": 0.02082175202667713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806312541730318, + "compression_loss": 0.0, + "distillation_loss": 0.08796247839927673, + "epoch": 3.37, + "learning_rate": 2.6464257809160548e-05, + "loss": 0.0828, + "step": 3551, + "task_loss": 0.03604867681860924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806431734008986, + "compression_loss": 0.0, + "distillation_loss": 0.03714694827795029, + "epoch": 3.37, + "learning_rate": 2.6453620722761896e-05, + "loss": 0.0501, + "step": 3552, + "task_loss": 0.16653135418891907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806550877378224, + "compression_loss": 0.0, + "distillation_loss": 0.15399909019470215, + "epoch": 3.37, + "learning_rate": 2.6442983372306045e-05, + "loss": 0.1543, + "step": 3553, + "task_loss": 0.15673311054706573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806669971848073, + "compression_loss": 0.0, + "distillation_loss": 0.23555459082126617, + "epoch": 3.38, + "learning_rate": 2.643234575972531e-05, + "loss": 0.2352, + "step": 3554, + "task_loss": 0.2320120632648468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7806789017428565, + "compression_loss": 0.0, + "distillation_loss": 0.0971142053604126, + "epoch": 3.38, + "learning_rate": 2.642170788695208e-05, + "loss": 0.0907, + "step": 3555, + "task_loss": 0.03279740735888481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780690801412974, + "compression_loss": 0.0, + "distillation_loss": 0.05912064015865326, + "epoch": 3.38, + "learning_rate": 2.6411069755918755e-05, + "loss": 0.0546, + "step": 3556, + "task_loss": 0.013454478234052658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807026961961634, + "compression_loss": 0.0, + "distillation_loss": 0.15938124060630798, + "epoch": 3.38, + "learning_rate": 2.6400431368557815e-05, + "loss": 0.1581, + "step": 3557, + "task_loss": 0.14631018042564392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807145860934284, + "compression_loss": 0.0, + "distillation_loss": 0.08248989284038544, + "epoch": 3.38, + "learning_rate": 2.6389792726801778e-05, + "loss": 0.0976, + "step": 3558, + "task_loss": 0.23338665068149567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807264711057725, + "compression_loss": 0.0, + "distillation_loss": 0.03553071618080139, + "epoch": 3.38, + "learning_rate": 2.6379153832583186e-05, + "loss": 0.0324, + "step": 3559, + "task_loss": 0.004168994724750519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807383512341997, + "compression_loss": 0.0, + "distillation_loss": 0.018167581409215927, + "epoch": 3.38, + "learning_rate": 2.6368514687834672e-05, + "loss": 0.0167, + "step": 3560, + "task_loss": 0.003941915929317474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807502264797134, + "compression_loss": 0.0, + "distillation_loss": 0.09068135917186737, + "epoch": 3.38, + "learning_rate": 2.6357875294488865e-05, + "loss": 0.0855, + "step": 3561, + "task_loss": 0.03913940489292145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807620968433175, + "compression_loss": 0.0, + "distillation_loss": 0.06539643555879593, + "epoch": 3.38, + "learning_rate": 2.6347235654478482e-05, + "loss": 0.0653, + "step": 3562, + "task_loss": 0.06428472697734833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807739623260155, + "compression_loss": 0.0, + "distillation_loss": 0.09735129773616791, + "epoch": 3.38, + "learning_rate": 2.6336595769736245e-05, + "loss": 0.0938, + "step": 3563, + "task_loss": 0.06144891679286957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807858229288113, + "compression_loss": 0.0, + "distillation_loss": 0.1435181051492691, + "epoch": 3.38, + "learning_rate": 2.6325955642194948e-05, + "loss": 0.1363, + "step": 3564, + "task_loss": 0.07164686918258667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7807976786527082, + "compression_loss": 0.0, + "distillation_loss": 0.05075103044509888, + "epoch": 3.39, + "learning_rate": 2.6315315273787428e-05, + "loss": 0.0474, + "step": 3565, + "task_loss": 0.01732024922966957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7808095294987103, + "compression_loss": 0.0, + "distillation_loss": 0.2770991325378418, + "epoch": 3.39, + "learning_rate": 2.630467466644655e-05, + "loss": 0.2756, + "step": 3566, + "task_loss": 0.2625621557235718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7808213754678212, + "compression_loss": 0.0, + "distillation_loss": 0.13243553042411804, + "epoch": 3.39, + "learning_rate": 2.629403382210524e-05, + "loss": 0.1435, + "step": 3567, + "task_loss": 0.24289308488368988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7808332165610443, + "compression_loss": 0.0, + "distillation_loss": 0.04136691614985466, + "epoch": 3.39, + "learning_rate": 2.628339274269645e-05, + "loss": 0.0517, + "step": 3568, + "task_loss": 0.14519576728343964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7808450527793835, + "compression_loss": 0.0, + "distillation_loss": 0.014836801216006279, + "epoch": 3.39, + "learning_rate": 2.6272751430153186e-05, + "loss": 0.0174, + "step": 3569, + "task_loss": 0.04046123847365379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7808568841238426, + "compression_loss": 0.0, + "distillation_loss": 0.11791656911373138, + "epoch": 3.39, + "learning_rate": 2.62621098864085e-05, + "loss": 0.1122, + "step": 3570, + "task_loss": 0.06102790683507919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780868710595425, + "compression_loss": 0.0, + "distillation_loss": 0.026029329746961594, + "epoch": 3.39, + "learning_rate": 2.6251468113395465e-05, + "loss": 0.0323, + "step": 3571, + "task_loss": 0.08885947614908218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7808805321951345, + "compression_loss": 0.0, + "distillation_loss": 0.07591719180345535, + "epoch": 3.39, + "learning_rate": 2.6240826113047235e-05, + "loss": 0.0735, + "step": 3572, + "task_loss": 0.05175752192735672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.780892348923975, + "compression_loss": 0.0, + "distillation_loss": 0.030038170516490936, + "epoch": 3.39, + "learning_rate": 2.6230183887296955e-05, + "loss": 0.0315, + "step": 3573, + "task_loss": 0.044650256633758545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809041607829498, + "compression_loss": 0.0, + "distillation_loss": 0.0887594223022461, + "epoch": 3.39, + "learning_rate": 2.6219541438077855e-05, + "loss": 0.0841, + "step": 3574, + "task_loss": 0.04259561002254486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809159677730628, + "compression_loss": 0.0, + "distillation_loss": 0.10874880850315094, + "epoch": 3.4, + "learning_rate": 2.620889876732317e-05, + "loss": 0.1032, + "step": 3575, + "task_loss": 0.05339481681585312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809277698953176, + "compression_loss": 0.0, + "distillation_loss": 0.03946895897388458, + "epoch": 3.4, + "learning_rate": 2.6198255876966204e-05, + "loss": 0.0368, + "step": 3576, + "task_loss": 0.012670749798417091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809395671507181, + "compression_loss": 0.0, + "distillation_loss": 0.2581176459789276, + "epoch": 3.4, + "learning_rate": 2.6187612768940293e-05, + "loss": 0.2457, + "step": 3577, + "task_loss": 0.13422563672065735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809513595402676, + "compression_loss": 0.0, + "distillation_loss": 0.0735367089509964, + "epoch": 3.4, + "learning_rate": 2.61769694451788e-05, + "loss": 0.0734, + "step": 3578, + "task_loss": 0.07223978638648987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809631470649702, + "compression_loss": 0.0, + "distillation_loss": 0.07702729851007462, + "epoch": 3.4, + "learning_rate": 2.616632590761514e-05, + "loss": 0.078, + "step": 3579, + "task_loss": 0.08705680817365646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809749297258293, + "compression_loss": 0.0, + "distillation_loss": 0.037445612251758575, + "epoch": 3.4, + "learning_rate": 2.615568215818276e-05, + "loss": 0.0375, + "step": 3580, + "task_loss": 0.03814993426203728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809867075238486, + "compression_loss": 0.0, + "distillation_loss": 0.04116063937544823, + "epoch": 3.4, + "learning_rate": 2.6145038198815152e-05, + "loss": 0.038, + "step": 3581, + "task_loss": 0.00981508381664753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7809984804600318, + "compression_loss": 0.0, + "distillation_loss": 0.09640266001224518, + "epoch": 3.4, + "learning_rate": 2.6134394031445843e-05, + "loss": 0.1068, + "step": 3582, + "task_loss": 0.20053042471408844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7810102485353828, + "compression_loss": 0.0, + "distillation_loss": 0.14251397550106049, + "epoch": 3.4, + "learning_rate": 2.6123749658008383e-05, + "loss": 0.1474, + "step": 3583, + "task_loss": 0.1909736692905426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781022011750905, + "compression_loss": 0.0, + "distillation_loss": 0.027869436889886856, + "epoch": 3.4, + "learning_rate": 2.6113105080436396e-05, + "loss": 0.0299, + "step": 3584, + "task_loss": 0.04815257340669632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7810337701076022, + "compression_loss": 0.0, + "distillation_loss": 0.048661135137081146, + "epoch": 3.4, + "learning_rate": 2.6102460300663506e-05, + "loss": 0.0529, + "step": 3585, + "task_loss": 0.09094928950071335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781045523606478, + "compression_loss": 0.0, + "distillation_loss": 0.04565204679965973, + "epoch": 3.41, + "learning_rate": 2.60918153206234e-05, + "loss": 0.0424, + "step": 3586, + "task_loss": 0.013126576319336891 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7810572722485363, + "compression_loss": 0.0, + "distillation_loss": 0.10344560444355011, + "epoch": 3.41, + "learning_rate": 2.6081170142249773e-05, + "loss": 0.1083, + "step": 3587, + "task_loss": 0.1516730636358261 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7810690160347805, + "compression_loss": 0.0, + "distillation_loss": 0.2309635877609253, + "epoch": 3.41, + "learning_rate": 2.607052476747639e-05, + "loss": 0.222, + "step": 3588, + "task_loss": 0.14136911928653717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7810807549662145, + "compression_loss": 0.0, + "distillation_loss": 0.024008475244045258, + "epoch": 3.41, + "learning_rate": 2.6059879198237026e-05, + "loss": 0.0232, + "step": 3589, + "task_loss": 0.016412295401096344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7810924890438419, + "compression_loss": 0.0, + "distillation_loss": 0.012282849289476871, + "epoch": 3.41, + "learning_rate": 2.6049233436465498e-05, + "loss": 0.0191, + "step": 3590, + "task_loss": 0.08071555197238922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811042182686664, + "compression_loss": 0.0, + "distillation_loss": 0.01852232962846756, + "epoch": 3.41, + "learning_rate": 2.6038587484095673e-05, + "loss": 0.0184, + "step": 3591, + "task_loss": 0.01706135831773281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811159426416916, + "compression_loss": 0.0, + "distillation_loss": 0.15857195854187012, + "epoch": 3.41, + "learning_rate": 2.6027941343061412e-05, + "loss": 0.1539, + "step": 3592, + "task_loss": 0.11174768954515457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811276621639213, + "compression_loss": 0.0, + "distillation_loss": 0.12345419079065323, + "epoch": 3.41, + "learning_rate": 2.6017295015296665e-05, + "loss": 0.122, + "step": 3593, + "task_loss": 0.10872073471546173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781139376836359, + "compression_loss": 0.0, + "distillation_loss": 0.19352051615715027, + "epoch": 3.41, + "learning_rate": 2.600664850273538e-05, + "loss": 0.1846, + "step": 3594, + "task_loss": 0.10442230850458145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811510866600085, + "compression_loss": 0.0, + "distillation_loss": 0.058610156178474426, + "epoch": 3.41, + "learning_rate": 2.599600180731155e-05, + "loss": 0.0625, + "step": 3595, + "task_loss": 0.09727238863706589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811627916358737, + "compression_loss": 0.0, + "distillation_loss": 0.03841635584831238, + "epoch": 3.42, + "learning_rate": 2.598535493095919e-05, + "loss": 0.0445, + "step": 3596, + "task_loss": 0.09931030124425888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811744917649579, + "compression_loss": 0.0, + "distillation_loss": 0.032603487372398376, + "epoch": 3.42, + "learning_rate": 2.5974707875612357e-05, + "loss": 0.0327, + "step": 3597, + "task_loss": 0.0331152006983757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781186187048265, + "compression_loss": 0.0, + "distillation_loss": 0.07631140947341919, + "epoch": 3.42, + "learning_rate": 2.5964060643205153e-05, + "loss": 0.0696, + "step": 3598, + "task_loss": 0.009496444836258888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7811978774867987, + "compression_loss": 0.0, + "distillation_loss": 0.027815554291009903, + "epoch": 3.42, + "learning_rate": 2.5953413235671688e-05, + "loss": 0.036, + "step": 3599, + "task_loss": 0.10932107269763947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7812095630815625, + "compression_loss": 0.0, + "distillation_loss": 0.07741496711969376, + "epoch": 3.42, + "learning_rate": 2.594276565494611e-05, + "loss": 0.0841, + "step": 3600, + "task_loss": 0.14415237307548523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7812212438335603, + "compression_loss": 0.0, + "distillation_loss": 0.09485390782356262, + "epoch": 3.42, + "learning_rate": 2.5932117902962616e-05, + "loss": 0.0979, + "step": 3601, + "task_loss": 0.12560498714447021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7812329197437957, + "compression_loss": 0.0, + "distillation_loss": 0.17144733667373657, + "epoch": 3.42, + "learning_rate": 2.5921469981655415e-05, + "loss": 0.1648, + "step": 3602, + "task_loss": 0.10540945827960968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7812445908132722, + "compression_loss": 0.0, + "distillation_loss": 0.11685089021921158, + "epoch": 3.42, + "learning_rate": 2.591082189295876e-05, + "loss": 0.1339, + "step": 3603, + "task_loss": 0.28701621294021606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7812562570429938, + "compression_loss": 0.0, + "distillation_loss": 0.0379994660615921, + "epoch": 3.42, + "learning_rate": 2.590017363880691e-05, + "loss": 0.0415, + "step": 3604, + "task_loss": 0.07254001498222351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781267918433964, + "compression_loss": 0.0, + "distillation_loss": 0.07432805001735687, + "epoch": 3.42, + "learning_rate": 2.5889525221134192e-05, + "loss": 0.072, + "step": 3605, + "task_loss": 0.0511680468916893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7812795749871866, + "compression_loss": 0.0, + "distillation_loss": 0.11450830101966858, + "epoch": 3.42, + "learning_rate": 2.5878876641874928e-05, + "loss": 0.1202, + "step": 3606, + "task_loss": 0.1715685874223709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781291226703665, + "compression_loss": 0.0, + "distillation_loss": 0.0681910514831543, + "epoch": 3.43, + "learning_rate": 2.5868227902963493e-05, + "loss": 0.064, + "step": 3607, + "task_loss": 0.026141859591007233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813028735844032, + "compression_loss": 0.0, + "distillation_loss": 0.056682735681533813, + "epoch": 3.43, + "learning_rate": 2.5857579006334282e-05, + "loss": 0.0532, + "step": 3608, + "task_loss": 0.022059109061956406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813145156304048, + "compression_loss": 0.0, + "distillation_loss": 0.08323369175195694, + "epoch": 3.43, + "learning_rate": 2.58469299539217e-05, + "loss": 0.0813, + "step": 3609, + "task_loss": 0.06356573849916458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813261528426734, + "compression_loss": 0.0, + "distillation_loss": 0.015457273460924625, + "epoch": 3.43, + "learning_rate": 2.5836280747660225e-05, + "loss": 0.0144, + "step": 3610, + "task_loss": 0.005144596099853516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813377852222126, + "compression_loss": 0.0, + "distillation_loss": 0.05984696373343468, + "epoch": 3.43, + "learning_rate": 2.5825631389484323e-05, + "loss": 0.0799, + "step": 3611, + "task_loss": 0.2608661949634552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813494127700263, + "compression_loss": 0.0, + "distillation_loss": 0.22596868872642517, + "epoch": 3.43, + "learning_rate": 2.58149818813285e-05, + "loss": 0.2201, + "step": 3612, + "task_loss": 0.16762208938598633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813610354871181, + "compression_loss": 0.0, + "distillation_loss": 0.09914842247962952, + "epoch": 3.43, + "learning_rate": 2.5804332225127294e-05, + "loss": 0.101, + "step": 3613, + "task_loss": 0.117338627576828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813726533744917, + "compression_loss": 0.0, + "distillation_loss": 0.06026465445756912, + "epoch": 3.43, + "learning_rate": 2.579368242281527e-05, + "loss": 0.0678, + "step": 3614, + "task_loss": 0.13608182966709137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813842664331506, + "compression_loss": 0.0, + "distillation_loss": 0.16431432962417603, + "epoch": 3.43, + "learning_rate": 2.5783032476327007e-05, + "loss": 0.1575, + "step": 3615, + "task_loss": 0.09610229730606079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7813958746640988, + "compression_loss": 0.0, + "distillation_loss": 0.22635792195796967, + "epoch": 3.43, + "learning_rate": 2.5772382387597128e-05, + "loss": 0.2213, + "step": 3616, + "task_loss": 0.17567506432533264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814074780683398, + "compression_loss": 0.0, + "distillation_loss": 0.02461588941514492, + "epoch": 3.43, + "learning_rate": 2.5761732158560263e-05, + "loss": 0.023, + "step": 3617, + "task_loss": 0.008299414068460464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814190766468772, + "compression_loss": 0.0, + "distillation_loss": 0.06911341845989227, + "epoch": 3.44, + "learning_rate": 2.5751081791151083e-05, + "loss": 0.0656, + "step": 3618, + "task_loss": 0.03424249589443207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814306704007148, + "compression_loss": 0.0, + "distillation_loss": 0.21919073164463043, + "epoch": 3.44, + "learning_rate": 2.574043128730428e-05, + "loss": 0.2192, + "step": 3619, + "task_loss": 0.2192375510931015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814422593308563, + "compression_loss": 0.0, + "distillation_loss": 0.052816398441791534, + "epoch": 3.44, + "learning_rate": 2.572978064895457e-05, + "loss": 0.0481, + "step": 3620, + "task_loss": 0.006035482510924339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814538434383054, + "compression_loss": 0.0, + "distillation_loss": 0.19541379809379578, + "epoch": 3.44, + "learning_rate": 2.5719129878036686e-05, + "loss": 0.1998, + "step": 3621, + "task_loss": 0.2397608757019043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814654227240656, + "compression_loss": 0.0, + "distillation_loss": 0.05258458852767944, + "epoch": 3.44, + "learning_rate": 2.5708478976485402e-05, + "loss": 0.0484, + "step": 3622, + "task_loss": 0.010881522670388222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814769971891408, + "compression_loss": 0.0, + "distillation_loss": 0.07996995747089386, + "epoch": 3.44, + "learning_rate": 2.569782794623549e-05, + "loss": 0.0778, + "step": 3623, + "task_loss": 0.058584682643413544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7814885668345345, + "compression_loss": 0.0, + "distillation_loss": 0.26377159357070923, + "epoch": 3.44, + "learning_rate": 2.5687176789221784e-05, + "loss": 0.2652, + "step": 3624, + "task_loss": 0.2783206105232239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815001316612505, + "compression_loss": 0.0, + "distillation_loss": 0.1098802238702774, + "epoch": 3.44, + "learning_rate": 2.5676525507379097e-05, + "loss": 0.1033, + "step": 3625, + "task_loss": 0.04377390444278717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815116916702924, + "compression_loss": 0.0, + "distillation_loss": 0.19648560881614685, + "epoch": 3.44, + "learning_rate": 2.566587410264229e-05, + "loss": 0.1934, + "step": 3626, + "task_loss": 0.16522561013698578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815232468626641, + "compression_loss": 0.0, + "distillation_loss": 0.09389695525169373, + "epoch": 3.44, + "learning_rate": 2.565522257694625e-05, + "loss": 0.0902, + "step": 3627, + "task_loss": 0.05693268030881882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781534797239369, + "compression_loss": 0.0, + "distillation_loss": 0.053470179438591, + "epoch": 3.45, + "learning_rate": 2.5644570932225874e-05, + "loss": 0.0505, + "step": 3628, + "task_loss": 0.023593464866280556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.781546342801411, + "compression_loss": 0.0, + "distillation_loss": 0.13491879403591156, + "epoch": 3.45, + "learning_rate": 2.5633919170416087e-05, + "loss": 0.1316, + "step": 3629, + "task_loss": 0.10193748027086258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815578835497936, + "compression_loss": 0.0, + "distillation_loss": 0.0393562838435173, + "epoch": 3.45, + "learning_rate": 2.5623267293451826e-05, + "loss": 0.0364, + "step": 3630, + "task_loss": 0.009963281452655792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815694194855205, + "compression_loss": 0.0, + "distillation_loss": 0.09000158309936523, + "epoch": 3.45, + "learning_rate": 2.5612615303268062e-05, + "loss": 0.0853, + "step": 3631, + "task_loss": 0.04277219995856285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815809506095955, + "compression_loss": 0.0, + "distillation_loss": 0.04296325892210007, + "epoch": 3.45, + "learning_rate": 2.560196320179977e-05, + "loss": 0.0837, + "step": 3632, + "task_loss": 0.4504657983779907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7815924769230224, + "compression_loss": 0.0, + "distillation_loss": 0.05088837444782257, + "epoch": 3.45, + "learning_rate": 2.559131099098197e-05, + "loss": 0.0472, + "step": 3633, + "task_loss": 0.013780592009425163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816039984268045, + "compression_loss": 0.0, + "distillation_loss": 0.04055938124656677, + "epoch": 3.45, + "learning_rate": 2.558065867274968e-05, + "loss": 0.0381, + "step": 3634, + "task_loss": 0.015710243955254555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816155151219457, + "compression_loss": 0.0, + "distillation_loss": 0.21613720059394836, + "epoch": 3.45, + "learning_rate": 2.5570006249037943e-05, + "loss": 0.2207, + "step": 3635, + "task_loss": 0.261584997177124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816270270094497, + "compression_loss": 0.0, + "distillation_loss": 0.17852503061294556, + "epoch": 3.45, + "learning_rate": 2.5559353721781832e-05, + "loss": 0.1745, + "step": 3636, + "task_loss": 0.13826580345630646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816385340903202, + "compression_loss": 0.0, + "distillation_loss": 0.019594872370362282, + "epoch": 3.45, + "learning_rate": 2.5548701092916415e-05, + "loss": 0.0184, + "step": 3637, + "task_loss": 0.00783473439514637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816500363655609, + "compression_loss": 0.0, + "distillation_loss": 0.028669090941548347, + "epoch": 3.45, + "learning_rate": 2.5538048364376806e-05, + "loss": 0.0316, + "step": 3638, + "task_loss": 0.05839303508400917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816615338361753, + "compression_loss": 0.0, + "distillation_loss": 0.039027225226163864, + "epoch": 3.46, + "learning_rate": 2.552739553809812e-05, + "loss": 0.0409, + "step": 3639, + "task_loss": 0.05817575752735138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816730265031673, + "compression_loss": 0.0, + "distillation_loss": 0.05956938490271568, + "epoch": 3.46, + "learning_rate": 2.5516742616015493e-05, + "loss": 0.0601, + "step": 3640, + "task_loss": 0.06536316871643066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816845143675404, + "compression_loss": 0.0, + "distillation_loss": 0.19583433866500854, + "epoch": 3.46, + "learning_rate": 2.5506089600064086e-05, + "loss": 0.2043, + "step": 3641, + "task_loss": 0.2809743881225586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7816959974302984, + "compression_loss": 0.0, + "distillation_loss": 0.10645139217376709, + "epoch": 3.46, + "learning_rate": 2.549543649217906e-05, + "loss": 0.1073, + "step": 3642, + "task_loss": 0.11531829088926315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817074756924449, + "compression_loss": 0.0, + "distillation_loss": 0.13260212540626526, + "epoch": 3.46, + "learning_rate": 2.548478329429561e-05, + "loss": 0.1288, + "step": 3643, + "task_loss": 0.09502564370632172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817189491549836, + "compression_loss": 0.0, + "distillation_loss": 0.03851540386676788, + "epoch": 3.46, + "learning_rate": 2.5474130008348946e-05, + "loss": 0.0381, + "step": 3644, + "task_loss": 0.034055422991514206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817304178189184, + "compression_loss": 0.0, + "distillation_loss": 0.03768332302570343, + "epoch": 3.46, + "learning_rate": 2.5463476636274276e-05, + "loss": 0.0514, + "step": 3645, + "task_loss": 0.17461322247982025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817418816852526, + "compression_loss": 0.0, + "distillation_loss": 0.19072747230529785, + "epoch": 3.46, + "learning_rate": 2.5452823180006845e-05, + "loss": 0.1848, + "step": 3646, + "task_loss": 0.13176460564136505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817533407549903, + "compression_loss": 0.0, + "distillation_loss": 0.06055070832371712, + "epoch": 3.46, + "learning_rate": 2.5442169641481907e-05, + "loss": 0.0671, + "step": 3647, + "task_loss": 0.1260763555765152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817647950291348, + "compression_loss": 0.0, + "distillation_loss": 0.03400256484746933, + "epoch": 3.46, + "learning_rate": 2.5431516022634715e-05, + "loss": 0.0456, + "step": 3648, + "task_loss": 0.1496407389640808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817762445086899, + "compression_loss": 0.0, + "distillation_loss": 0.021507391706109047, + "epoch": 3.47, + "learning_rate": 2.5420862325400563e-05, + "loss": 0.02, + "step": 3649, + "task_loss": 0.006038764491677284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817876891946594, + "compression_loss": 0.0, + "distillation_loss": 0.02982695959508419, + "epoch": 3.47, + "learning_rate": 2.5410208551714742e-05, + "loss": 0.0378, + "step": 3650, + "task_loss": 0.10967092961072922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7817991290880468, + "compression_loss": 0.0, + "distillation_loss": 0.08754716068506241, + "epoch": 3.47, + "learning_rate": 2.539955470351257e-05, + "loss": 0.0896, + "step": 3651, + "task_loss": 0.1080557107925415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818105641898561, + "compression_loss": 0.0, + "distillation_loss": 0.03534093499183655, + "epoch": 3.47, + "learning_rate": 2.5388900782729347e-05, + "loss": 0.0326, + "step": 3652, + "task_loss": 0.007803870365023613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818219945010906, + "compression_loss": 0.0, + "distillation_loss": 0.07137121260166168, + "epoch": 3.47, + "learning_rate": 2.5378246791300435e-05, + "loss": 0.077, + "step": 3653, + "task_loss": 0.12779828906059265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818334200227542, + "compression_loss": 0.0, + "distillation_loss": 0.2230396568775177, + "epoch": 3.47, + "learning_rate": 2.536759273116117e-05, + "loss": 0.2194, + "step": 3654, + "task_loss": 0.18703873455524445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818448407558506, + "compression_loss": 0.0, + "distillation_loss": 0.13464270532131195, + "epoch": 3.47, + "learning_rate": 2.5356938604246916e-05, + "loss": 0.1454, + "step": 3655, + "task_loss": 0.24241343140602112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818562567013833, + "compression_loss": 0.0, + "distillation_loss": 0.03593993932008743, + "epoch": 3.47, + "learning_rate": 2.534628441249305e-05, + "loss": 0.0394, + "step": 3656, + "task_loss": 0.07036207616329193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818676678603561, + "compression_loss": 0.0, + "distillation_loss": 0.13949531316757202, + "epoch": 3.47, + "learning_rate": 2.5335630157834937e-05, + "loss": 0.1425, + "step": 3657, + "task_loss": 0.16993454098701477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818790742337728, + "compression_loss": 0.0, + "distillation_loss": 0.08820469677448273, + "epoch": 3.47, + "learning_rate": 2.5324975842208004e-05, + "loss": 0.0803, + "step": 3658, + "task_loss": 0.008724292740225792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7818904758226368, + "compression_loss": 0.0, + "distillation_loss": 0.12831662595272064, + "epoch": 3.47, + "learning_rate": 2.5314321467547635e-05, + "loss": 0.1308, + "step": 3659, + "task_loss": 0.15305723249912262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819018726279521, + "compression_loss": 0.0, + "distillation_loss": 0.02787201665341854, + "epoch": 3.48, + "learning_rate": 2.5303667035789262e-05, + "loss": 0.0258, + "step": 3660, + "task_loss": 0.007001947611570358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819132646507222, + "compression_loss": 0.0, + "distillation_loss": 0.02681071124970913, + "epoch": 3.48, + "learning_rate": 2.5293012548868306e-05, + "loss": 0.0394, + "step": 3661, + "task_loss": 0.15266357362270355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819246518919507, + "compression_loss": 0.0, + "distillation_loss": 0.07564039528369904, + "epoch": 3.48, + "learning_rate": 2.5282358008720213e-05, + "loss": 0.0799, + "step": 3662, + "task_loss": 0.11861392855644226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819360343526415, + "compression_loss": 0.0, + "distillation_loss": 0.04754069447517395, + "epoch": 3.48, + "learning_rate": 2.5271703417280433e-05, + "loss": 0.0612, + "step": 3663, + "task_loss": 0.1845139116048813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819474120337981, + "compression_loss": 0.0, + "distillation_loss": 0.01500766258686781, + "epoch": 3.48, + "learning_rate": 2.526104877648441e-05, + "loss": 0.0139, + "step": 3664, + "task_loss": 0.00399116612970829 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819587849364243, + "compression_loss": 0.0, + "distillation_loss": 0.06251572072505951, + "epoch": 3.48, + "learning_rate": 2.525039408826762e-05, + "loss": 0.065, + "step": 3665, + "task_loss": 0.08762296289205551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819701530615237, + "compression_loss": 0.0, + "distillation_loss": 0.1165790855884552, + "epoch": 3.48, + "learning_rate": 2.523973935456554e-05, + "loss": 0.1153, + "step": 3666, + "task_loss": 0.10344909876585007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819815164101, + "compression_loss": 0.0, + "distillation_loss": 0.09375932067632675, + "epoch": 3.48, + "learning_rate": 2.522908457731366e-05, + "loss": 0.0982, + "step": 3667, + "task_loss": 0.138652503490448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7819928749831571, + "compression_loss": 0.0, + "distillation_loss": 0.09869857132434845, + "epoch": 3.48, + "learning_rate": 2.5218429758447455e-05, + "loss": 0.0972, + "step": 3668, + "task_loss": 0.08332294970750809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820042287816983, + "compression_loss": 0.0, + "distillation_loss": 0.07920647412538528, + "epoch": 3.48, + "learning_rate": 2.520777489990243e-05, + "loss": 0.0822, + "step": 3669, + "task_loss": 0.10958139598369598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820155778067275, + "compression_loss": 0.0, + "distillation_loss": 0.04178139567375183, + "epoch": 3.49, + "learning_rate": 2.5197120003614094e-05, + "loss": 0.0431, + "step": 3670, + "task_loss": 0.054654479026794434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820269220592484, + "compression_loss": 0.0, + "distillation_loss": 0.08496996760368347, + "epoch": 3.49, + "learning_rate": 2.518646507151796e-05, + "loss": 0.0805, + "step": 3671, + "task_loss": 0.04056711122393608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820382615402646, + "compression_loss": 0.0, + "distillation_loss": 0.07371478527784348, + "epoch": 3.49, + "learning_rate": 2.517581010554956e-05, + "loss": 0.0778, + "step": 3672, + "task_loss": 0.11423461884260178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820495962507797, + "compression_loss": 0.0, + "distillation_loss": 0.04122939705848694, + "epoch": 3.49, + "learning_rate": 2.5165155107644394e-05, + "loss": 0.0386, + "step": 3673, + "task_loss": 0.014970516785979271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820609261917976, + "compression_loss": 0.0, + "distillation_loss": 0.18131166696548462, + "epoch": 3.49, + "learning_rate": 2.515450007973801e-05, + "loss": 0.1739, + "step": 3674, + "task_loss": 0.10724371671676636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820722513643219, + "compression_loss": 0.0, + "distillation_loss": 0.11633885651826859, + "epoch": 3.49, + "learning_rate": 2.5143845023765943e-05, + "loss": 0.1146, + "step": 3675, + "task_loss": 0.09903288632631302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820835717693562, + "compression_loss": 0.0, + "distillation_loss": 0.15314285457134247, + "epoch": 3.49, + "learning_rate": 2.513318994166373e-05, + "loss": 0.1471, + "step": 3676, + "task_loss": 0.09299644827842712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7820948874079042, + "compression_loss": 0.0, + "distillation_loss": 0.05131184682250023, + "epoch": 3.49, + "learning_rate": 2.5122534835366934e-05, + "loss": 0.0473, + "step": 3677, + "task_loss": 0.011473558843135834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821061982809697, + "compression_loss": 0.0, + "distillation_loss": 0.23348447680473328, + "epoch": 3.49, + "learning_rate": 2.5111879706811087e-05, + "loss": 0.226, + "step": 3678, + "task_loss": 0.15873844921588898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821175043895563, + "compression_loss": 0.0, + "distillation_loss": 0.050928764045238495, + "epoch": 3.49, + "learning_rate": 2.5101224557931758e-05, + "loss": 0.0542, + "step": 3679, + "task_loss": 0.08366743475198746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821288057346677, + "compression_loss": 0.0, + "distillation_loss": 0.1100388914346695, + "epoch": 3.49, + "learning_rate": 2.5090569390664492e-05, + "loss": 0.1145, + "step": 3680, + "task_loss": 0.15434427559375763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821401023173075, + "compression_loss": 0.0, + "distillation_loss": 0.13357719779014587, + "epoch": 3.5, + "learning_rate": 2.5079914206944866e-05, + "loss": 0.1348, + "step": 3681, + "task_loss": 0.14616169035434723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821513941384794, + "compression_loss": 0.0, + "distillation_loss": 0.10456112027168274, + "epoch": 3.5, + "learning_rate": 2.5069259008708446e-05, + "loss": 0.1072, + "step": 3682, + "task_loss": 0.1304989755153656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821626811991873, + "compression_loss": 0.0, + "distillation_loss": 0.028667902573943138, + "epoch": 3.5, + "learning_rate": 2.5058603797890778e-05, + "loss": 0.0353, + "step": 3683, + "task_loss": 0.094968281686306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821739635004347, + "compression_loss": 0.0, + "distillation_loss": 0.11443310976028442, + "epoch": 3.5, + "learning_rate": 2.504794857642746e-05, + "loss": 0.1115, + "step": 3684, + "task_loss": 0.08492926508188248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821852410432252, + "compression_loss": 0.0, + "distillation_loss": 0.04449796676635742, + "epoch": 3.5, + "learning_rate": 2.5037293346254044e-05, + "loss": 0.0453, + "step": 3685, + "task_loss": 0.05215233191847801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7821965138285626, + "compression_loss": 0.0, + "distillation_loss": 0.08054036647081375, + "epoch": 3.5, + "learning_rate": 2.5026638109306118e-05, + "loss": 0.0923, + "step": 3686, + "task_loss": 0.1976814568042755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822077818574505, + "compression_loss": 0.0, + "distillation_loss": 0.03564410284161568, + "epoch": 3.5, + "learning_rate": 2.5015982867519245e-05, + "loss": 0.0327, + "step": 3687, + "task_loss": 0.006579475477337837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822190451308928, + "compression_loss": 0.0, + "distillation_loss": 0.032507769763469696, + "epoch": 3.5, + "learning_rate": 2.500532762282901e-05, + "loss": 0.0497, + "step": 3688, + "task_loss": 0.20428074896335602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822303036498929, + "compression_loss": 0.0, + "distillation_loss": 0.03502486273646355, + "epoch": 3.5, + "learning_rate": 2.4994672377170988e-05, + "loss": 0.0328, + "step": 3689, + "task_loss": 0.013175180181860924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822415574154546, + "compression_loss": 0.0, + "distillation_loss": 0.05492643266916275, + "epoch": 3.5, + "learning_rate": 2.4984017132480754e-05, + "loss": 0.0587, + "step": 3690, + "task_loss": 0.0923444926738739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822528064285817, + "compression_loss": 0.0, + "distillation_loss": 0.022486487403512, + "epoch": 3.51, + "learning_rate": 2.4973361890693888e-05, + "loss": 0.021, + "step": 3691, + "task_loss": 0.0075523629784584045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822640506902777, + "compression_loss": 0.0, + "distillation_loss": 0.11572916805744171, + "epoch": 3.51, + "learning_rate": 2.496270665374596e-05, + "loss": 0.1192, + "step": 3692, + "task_loss": 0.15077991783618927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822752902015464, + "compression_loss": 0.0, + "distillation_loss": 0.08084140717983246, + "epoch": 3.51, + "learning_rate": 2.4952051423572548e-05, + "loss": 0.0787, + "step": 3693, + "task_loss": 0.058929864317178726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822865249633913, + "compression_loss": 0.0, + "distillation_loss": 0.06542964279651642, + "epoch": 3.51, + "learning_rate": 2.494139620210923e-05, + "loss": 0.0636, + "step": 3694, + "task_loss": 0.04673419147729874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7822977549768164, + "compression_loss": 0.0, + "distillation_loss": 0.0398731529712677, + "epoch": 3.51, + "learning_rate": 2.4930740991291567e-05, + "loss": 0.0375, + "step": 3695, + "task_loss": 0.016480809077620506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823089802428251, + "compression_loss": 0.0, + "distillation_loss": 0.02609540894627571, + "epoch": 3.51, + "learning_rate": 2.492008579305514e-05, + "loss": 0.032, + "step": 3696, + "task_loss": 0.08469408750534058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823202007624213, + "compression_loss": 0.0, + "distillation_loss": 0.07933405041694641, + "epoch": 3.51, + "learning_rate": 2.4909430609335517e-05, + "loss": 0.0922, + "step": 3697, + "task_loss": 0.2080336958169937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823314165366084, + "compression_loss": 0.0, + "distillation_loss": 0.08961853384971619, + "epoch": 3.51, + "learning_rate": 2.4898775442068248e-05, + "loss": 0.0827, + "step": 3698, + "task_loss": 0.020808879286050797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823426275663903, + "compression_loss": 0.0, + "distillation_loss": 0.028536148369312286, + "epoch": 3.51, + "learning_rate": 2.4888120293188916e-05, + "loss": 0.0301, + "step": 3699, + "task_loss": 0.04397343844175339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823538338527707, + "compression_loss": 0.0, + "distillation_loss": 0.030012015253305435, + "epoch": 3.51, + "learning_rate": 2.487746516463307e-05, + "loss": 0.0367, + "step": 3700, + "task_loss": 0.096799835562706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823650353967531, + "compression_loss": 0.0, + "distillation_loss": 0.03244200348854065, + "epoch": 3.51, + "learning_rate": 2.486681005833627e-05, + "loss": 0.0298, + "step": 3701, + "task_loss": 0.005791664123535156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823762321993413, + "compression_loss": 0.0, + "distillation_loss": 0.19793766736984253, + "epoch": 3.52, + "learning_rate": 2.4856154976234063e-05, + "loss": 0.1875, + "step": 3702, + "task_loss": 0.09377109259366989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7823874242615391, + "compression_loss": 0.0, + "distillation_loss": 0.15221963822841644, + "epoch": 3.52, + "learning_rate": 2.4845499920261993e-05, + "loss": 0.1428, + "step": 3703, + "task_loss": 0.05772307515144348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78239861158435, + "compression_loss": 0.0, + "distillation_loss": 0.031839657574892044, + "epoch": 3.52, + "learning_rate": 2.4834844892355615e-05, + "loss": 0.0292, + "step": 3704, + "task_loss": 0.005376823246479034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824097941687777, + "compression_loss": 0.0, + "distillation_loss": 0.03910262882709503, + "epoch": 3.52, + "learning_rate": 2.4824189894450453e-05, + "loss": 0.0486, + "step": 3705, + "task_loss": 0.1344204545021057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824209720158258, + "compression_loss": 0.0, + "distillation_loss": 0.14387640357017517, + "epoch": 3.52, + "learning_rate": 2.4813534928482038e-05, + "loss": 0.1372, + "step": 3706, + "task_loss": 0.07675487548112869 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824321451264983, + "compression_loss": 0.0, + "distillation_loss": 0.05041220039129257, + "epoch": 3.52, + "learning_rate": 2.4802879996385905e-05, + "loss": 0.0566, + "step": 3707, + "task_loss": 0.11191262304782867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824433135017986, + "compression_loss": 0.0, + "distillation_loss": 0.1696065366268158, + "epoch": 3.52, + "learning_rate": 2.4792225100097578e-05, + "loss": 0.161, + "step": 3708, + "task_loss": 0.08338792622089386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824544771427305, + "compression_loss": 0.0, + "distillation_loss": 0.035738278180360794, + "epoch": 3.52, + "learning_rate": 2.4781570241552554e-05, + "loss": 0.0337, + "step": 3709, + "task_loss": 0.015493502840399742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824656360502976, + "compression_loss": 0.0, + "distillation_loss": 0.18329283595085144, + "epoch": 3.52, + "learning_rate": 2.477091542268635e-05, + "loss": 0.184, + "step": 3710, + "task_loss": 0.18989968299865723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824767902255038, + "compression_loss": 0.0, + "distillation_loss": 0.016138188540935516, + "epoch": 3.52, + "learning_rate": 2.4760260645434462e-05, + "loss": 0.0148, + "step": 3711, + "task_loss": 0.0031305551528930664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824879396693524, + "compression_loss": 0.0, + "distillation_loss": 0.18617260456085205, + "epoch": 3.53, + "learning_rate": 2.474960591173238e-05, + "loss": 0.1785, + "step": 3712, + "task_loss": 0.10942824184894562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7824990843828474, + "compression_loss": 0.0, + "distillation_loss": 0.0715465247631073, + "epoch": 3.53, + "learning_rate": 2.47389512235156e-05, + "loss": 0.0795, + "step": 3713, + "task_loss": 0.15127640962600708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825102243669924, + "compression_loss": 0.0, + "distillation_loss": 0.04901812970638275, + "epoch": 3.53, + "learning_rate": 2.472829658271958e-05, + "loss": 0.0451, + "step": 3714, + "task_loss": 0.01026928424835205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825213596227909, + "compression_loss": 0.0, + "distillation_loss": 0.03361319750547409, + "epoch": 3.53, + "learning_rate": 2.4717641991279786e-05, + "loss": 0.031, + "step": 3715, + "task_loss": 0.007931182160973549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825324901512469, + "compression_loss": 0.0, + "distillation_loss": 0.01693807728588581, + "epoch": 3.53, + "learning_rate": 2.4706987451131693e-05, + "loss": 0.0202, + "step": 3716, + "task_loss": 0.04959682375192642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825436159533639, + "compression_loss": 0.0, + "distillation_loss": 0.04695942997932434, + "epoch": 3.53, + "learning_rate": 2.4696332964210743e-05, + "loss": 0.0442, + "step": 3717, + "task_loss": 0.01931283064186573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825547370301456, + "compression_loss": 0.0, + "distillation_loss": 0.04885145276784897, + "epoch": 3.53, + "learning_rate": 2.468567853245237e-05, + "loss": 0.0489, + "step": 3718, + "task_loss": 0.049653246998786926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825658533825957, + "compression_loss": 0.0, + "distillation_loss": 0.03596015274524689, + "epoch": 3.53, + "learning_rate": 2.4675024157792005e-05, + "loss": 0.0335, + "step": 3719, + "task_loss": 0.011791346594691277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825769650117178, + "compression_loss": 0.0, + "distillation_loss": 0.08147747069597244, + "epoch": 3.53, + "learning_rate": 2.4664369842165068e-05, + "loss": 0.0833, + "step": 3720, + "task_loss": 0.0997631698846817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7825880719185156, + "compression_loss": 0.0, + "distillation_loss": 0.0436641164124012, + "epoch": 3.53, + "learning_rate": 2.4653715587506963e-05, + "loss": 0.0399, + "step": 3721, + "task_loss": 0.006380394101142883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.782599174103993, + "compression_loss": 0.0, + "distillation_loss": 0.0826558843255043, + "epoch": 3.53, + "learning_rate": 2.4643061395753093e-05, + "loss": 0.0923, + "step": 3722, + "task_loss": 0.178600013256073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826102715691534, + "compression_loss": 0.0, + "distillation_loss": 0.07361429929733276, + "epoch": 3.54, + "learning_rate": 2.463240726883884e-05, + "loss": 0.0684, + "step": 3723, + "task_loss": 0.021939026191830635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826213643150007, + "compression_loss": 0.0, + "distillation_loss": 0.08207493275403976, + "epoch": 3.54, + "learning_rate": 2.4621753208699567e-05, + "loss": 0.0901, + "step": 3724, + "task_loss": 0.16210611164569855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826324523425384, + "compression_loss": 0.0, + "distillation_loss": 0.058608826249837875, + "epoch": 3.54, + "learning_rate": 2.4611099217270652e-05, + "loss": 0.0535, + "step": 3725, + "task_loss": 0.007720122113823891 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826435356527703, + "compression_loss": 0.0, + "distillation_loss": 0.03315757215023041, + "epoch": 3.54, + "learning_rate": 2.4600445296487436e-05, + "loss": 0.0306, + "step": 3726, + "task_loss": 0.007330842316150665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826546142467001, + "compression_loss": 0.0, + "distillation_loss": 0.12233985215425491, + "epoch": 3.54, + "learning_rate": 2.4589791448285264e-05, + "loss": 0.1266, + "step": 3727, + "task_loss": 0.16475452482700348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826656881253313, + "compression_loss": 0.0, + "distillation_loss": 0.05593999847769737, + "epoch": 3.54, + "learning_rate": 2.4579137674599443e-05, + "loss": 0.0616, + "step": 3728, + "task_loss": 0.11271567642688751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826767572896678, + "compression_loss": 0.0, + "distillation_loss": 0.12087702751159668, + "epoch": 3.54, + "learning_rate": 2.456848397736529e-05, + "loss": 0.1295, + "step": 3729, + "task_loss": 0.20751793682575226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826878217407132, + "compression_loss": 0.0, + "distillation_loss": 0.060698773711919785, + "epoch": 3.54, + "learning_rate": 2.455783035851811e-05, + "loss": 0.0597, + "step": 3730, + "task_loss": 0.05119254067540169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7826988814794711, + "compression_loss": 0.0, + "distillation_loss": 0.07374706864356995, + "epoch": 3.54, + "learning_rate": 2.4547176819993164e-05, + "loss": 0.082, + "step": 3731, + "task_loss": 0.15632230043411255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827099365069453, + "compression_loss": 0.0, + "distillation_loss": 0.07239855825901031, + "epoch": 3.54, + "learning_rate": 2.4536523363725727e-05, + "loss": 0.071, + "step": 3732, + "task_loss": 0.058301545679569244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827209868241394, + "compression_loss": 0.0, + "distillation_loss": 0.02140430361032486, + "epoch": 3.55, + "learning_rate": 2.4525869991651057e-05, + "loss": 0.02, + "step": 3733, + "task_loss": 0.006873439997434616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827320324320571, + "compression_loss": 0.0, + "distillation_loss": 0.020102519541978836, + "epoch": 3.55, + "learning_rate": 2.4515216705704395e-05, + "loss": 0.0195, + "step": 3734, + "task_loss": 0.014333127066493034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827430733317022, + "compression_loss": 0.0, + "distillation_loss": 0.055659808218479156, + "epoch": 3.55, + "learning_rate": 2.4504563507820942e-05, + "loss": 0.0636, + "step": 3735, + "task_loss": 0.1351262629032135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827541095240782, + "compression_loss": 0.0, + "distillation_loss": 0.04852107912302017, + "epoch": 3.55, + "learning_rate": 2.449391039993592e-05, + "loss": 0.0525, + "step": 3736, + "task_loss": 0.08783454447984695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827651410101889, + "compression_loss": 0.0, + "distillation_loss": 0.029902100563049316, + "epoch": 3.55, + "learning_rate": 2.4483257383984513e-05, + "loss": 0.0296, + "step": 3737, + "task_loss": 0.026881104335188866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.782776167791038, + "compression_loss": 0.0, + "distillation_loss": 0.11988551914691925, + "epoch": 3.55, + "learning_rate": 2.4472604461901887e-05, + "loss": 0.1168, + "step": 3738, + "task_loss": 0.08916487544775009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827871898676291, + "compression_loss": 0.0, + "distillation_loss": 0.09784644842147827, + "epoch": 3.55, + "learning_rate": 2.44619516356232e-05, + "loss": 0.0946, + "step": 3739, + "task_loss": 0.06569681316614151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7827982072409658, + "compression_loss": 0.0, + "distillation_loss": 0.06321494281291962, + "epoch": 3.55, + "learning_rate": 2.4451298907083597e-05, + "loss": 0.0693, + "step": 3740, + "task_loss": 0.1242145225405693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828092199120521, + "compression_loss": 0.0, + "distillation_loss": 0.028960630297660828, + "epoch": 3.55, + "learning_rate": 2.4440646278218177e-05, + "loss": 0.0275, + "step": 3741, + "task_loss": 0.013882100582122803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828202278818913, + "compression_loss": 0.0, + "distillation_loss": 0.04928376153111458, + "epoch": 3.55, + "learning_rate": 2.442999375096206e-05, + "loss": 0.0484, + "step": 3742, + "task_loss": 0.040039315819740295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828312311514873, + "compression_loss": 0.0, + "distillation_loss": 0.08106836676597595, + "epoch": 3.55, + "learning_rate": 2.4419341327250323e-05, + "loss": 0.0827, + "step": 3743, + "task_loss": 0.09725627303123474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828422297218438, + "compression_loss": 0.0, + "distillation_loss": 0.26687347888946533, + "epoch": 3.56, + "learning_rate": 2.4408689009018037e-05, + "loss": 0.2664, + "step": 3744, + "task_loss": 0.26216715574264526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828532235939644, + "compression_loss": 0.0, + "distillation_loss": 0.061114098876714706, + "epoch": 3.56, + "learning_rate": 2.4398036798200235e-05, + "loss": 0.0888, + "step": 3745, + "task_loss": 0.3378845453262329 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828642127688528, + "compression_loss": 0.0, + "distillation_loss": 0.10098116099834442, + "epoch": 3.56, + "learning_rate": 2.4387384696731947e-05, + "loss": 0.1014, + "step": 3746, + "task_loss": 0.10469695925712585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828751972475126, + "compression_loss": 0.0, + "distillation_loss": 0.17749673128128052, + "epoch": 3.56, + "learning_rate": 2.4376732706548183e-05, + "loss": 0.187, + "step": 3747, + "task_loss": 0.2721644937992096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828861770309476, + "compression_loss": 0.0, + "distillation_loss": 0.0985332578420639, + "epoch": 3.56, + "learning_rate": 2.4366080829583922e-05, + "loss": 0.1061, + "step": 3748, + "task_loss": 0.17437314987182617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7828971521201615, + "compression_loss": 0.0, + "distillation_loss": 0.06623338907957077, + "epoch": 3.56, + "learning_rate": 2.4355429067774135e-05, + "loss": 0.0673, + "step": 3749, + "task_loss": 0.07646562159061432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829081225161579, + "compression_loss": 0.0, + "distillation_loss": 0.05294762924313545, + "epoch": 3.56, + "learning_rate": 2.4344777423053748e-05, + "loss": 0.0511, + "step": 3750, + "task_loss": 0.03410526365041733 + }, + { + "epoch": 3.56, + "eval_accuracy": 0.8853211009174312, + "eval_loss": 0.439721018075943, + "eval_runtime": 18.0919, + "eval_samples_per_second": 48.198, + "eval_steps_per_second": 6.025, + "step": 3750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829190882199406, + "compression_loss": 0.0, + "distillation_loss": 0.035928741097450256, + "epoch": 3.56, + "learning_rate": 2.433412589735771e-05, + "loss": 0.033, + "step": 3751, + "task_loss": 0.0063820406794548035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829300492325132, + "compression_loss": 0.0, + "distillation_loss": 0.04816991835832596, + "epoch": 3.56, + "learning_rate": 2.4323474492620905e-05, + "loss": 0.0522, + "step": 3752, + "task_loss": 0.08798433095216751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829410055548792, + "compression_loss": 0.0, + "distillation_loss": 0.05488145351409912, + "epoch": 3.56, + "learning_rate": 2.431282321077822e-05, + "loss": 0.0528, + "step": 3753, + "task_loss": 0.033642224967479706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829519571880427, + "compression_loss": 0.0, + "distillation_loss": 0.04462762922048569, + "epoch": 3.57, + "learning_rate": 2.4302172053764514e-05, + "loss": 0.0415, + "step": 3754, + "task_loss": 0.012937184423208237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.782962904133007, + "compression_loss": 0.0, + "distillation_loss": 0.13158389925956726, + "epoch": 3.57, + "learning_rate": 2.4291521023514604e-05, + "loss": 0.1315, + "step": 3755, + "task_loss": 0.13095977902412415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829738463907759, + "compression_loss": 0.0, + "distillation_loss": 0.12265504896640778, + "epoch": 3.57, + "learning_rate": 2.4280870121963323e-05, + "loss": 0.118, + "step": 3756, + "task_loss": 0.0760970339179039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829847839623532, + "compression_loss": 0.0, + "distillation_loss": 0.03240625560283661, + "epoch": 3.57, + "learning_rate": 2.4270219351045438e-05, + "loss": 0.0415, + "step": 3757, + "task_loss": 0.12327875196933746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7829957168487424, + "compression_loss": 0.0, + "distillation_loss": 0.033418092876672745, + "epoch": 3.57, + "learning_rate": 2.425956871269572e-05, + "loss": 0.0388, + "step": 3758, + "task_loss": 0.08721967041492462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830066450509473, + "compression_loss": 0.0, + "distillation_loss": 0.038820862770080566, + "epoch": 3.57, + "learning_rate": 2.4248918208848916e-05, + "loss": 0.0417, + "step": 3759, + "task_loss": 0.06801855564117432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830175685699716, + "compression_loss": 0.0, + "distillation_loss": 0.06290020793676376, + "epoch": 3.57, + "learning_rate": 2.423826784143974e-05, + "loss": 0.0586, + "step": 3760, + "task_loss": 0.019695976749062538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830284874068189, + "compression_loss": 0.0, + "distillation_loss": 0.04252585023641586, + "epoch": 3.57, + "learning_rate": 2.422761761240288e-05, + "loss": 0.0478, + "step": 3761, + "task_loss": 0.09514269232749939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783039401562493, + "compression_loss": 0.0, + "distillation_loss": 0.22789345681667328, + "epoch": 3.57, + "learning_rate": 2.4216967523673e-05, + "loss": 0.22, + "step": 3762, + "task_loss": 0.14860832691192627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830503110379974, + "compression_loss": 0.0, + "distillation_loss": 0.13879983127117157, + "epoch": 3.57, + "learning_rate": 2.420631757718474e-05, + "loss": 0.1326, + "step": 3763, + "task_loss": 0.07688990235328674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830612158343359, + "compression_loss": 0.0, + "distillation_loss": 0.14175349473953247, + "epoch": 3.57, + "learning_rate": 2.419566777487271e-05, + "loss": 0.138, + "step": 3764, + "task_loss": 0.1046019047498703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830721159525121, + "compression_loss": 0.0, + "distillation_loss": 0.02354617603123188, + "epoch": 3.58, + "learning_rate": 2.4185018118671504e-05, + "loss": 0.0316, + "step": 3765, + "task_loss": 0.1045549288392067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830830113935299, + "compression_loss": 0.0, + "distillation_loss": 0.23610442876815796, + "epoch": 3.58, + "learning_rate": 2.417436861051569e-05, + "loss": 0.2247, + "step": 3766, + "task_loss": 0.12234364449977875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7830939021583927, + "compression_loss": 0.0, + "distillation_loss": 0.08728310465812683, + "epoch": 3.58, + "learning_rate": 2.4163719252339774e-05, + "loss": 0.0861, + "step": 3767, + "task_loss": 0.0753721296787262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831047882481044, + "compression_loss": 0.0, + "distillation_loss": 0.04650529474020004, + "epoch": 3.58, + "learning_rate": 2.41530700460783e-05, + "loss": 0.0705, + "step": 3768, + "task_loss": 0.2862272262573242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831156696636685, + "compression_loss": 0.0, + "distillation_loss": 0.03932388499379158, + "epoch": 3.58, + "learning_rate": 2.4142420993665727e-05, + "loss": 0.0426, + "step": 3769, + "task_loss": 0.072311170399189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831265464060888, + "compression_loss": 0.0, + "distillation_loss": 0.09386499226093292, + "epoch": 3.58, + "learning_rate": 2.4131772097036516e-05, + "loss": 0.0865, + "step": 3770, + "task_loss": 0.0198683962225914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783137418476369, + "compression_loss": 0.0, + "distillation_loss": 0.09301801770925522, + "epoch": 3.58, + "learning_rate": 2.4121123358125078e-05, + "loss": 0.1046, + "step": 3771, + "task_loss": 0.20893818140029907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831482858755127, + "compression_loss": 0.0, + "distillation_loss": 0.15907953679561615, + "epoch": 3.58, + "learning_rate": 2.4110474778865817e-05, + "loss": 0.1651, + "step": 3772, + "task_loss": 0.21910780668258667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831591486045236, + "compression_loss": 0.0, + "distillation_loss": 0.050729766488075256, + "epoch": 3.58, + "learning_rate": 2.4099826361193098e-05, + "loss": 0.0566, + "step": 3773, + "task_loss": 0.10932128876447678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831700066644055, + "compression_loss": 0.0, + "distillation_loss": 0.0897529125213623, + "epoch": 3.58, + "learning_rate": 2.4089178107041254e-05, + "loss": 0.0848, + "step": 3774, + "task_loss": 0.039870765060186386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831808600561619, + "compression_loss": 0.0, + "distillation_loss": 0.1213412880897522, + "epoch": 3.58, + "learning_rate": 2.407853001834459e-05, + "loss": 0.1197, + "step": 3775, + "task_loss": 0.10536501556634903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7831917087807966, + "compression_loss": 0.0, + "distillation_loss": 0.038229409605264664, + "epoch": 3.59, + "learning_rate": 2.4067882097037383e-05, + "loss": 0.0357, + "step": 3776, + "task_loss": 0.012635331600904465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832025528393133, + "compression_loss": 0.0, + "distillation_loss": 0.2114480882883072, + "epoch": 3.59, + "learning_rate": 2.4057234345053894e-05, + "loss": 0.204, + "step": 3777, + "task_loss": 0.13707035779953003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832133922327156, + "compression_loss": 0.0, + "distillation_loss": 0.03434763103723526, + "epoch": 3.59, + "learning_rate": 2.404658676432832e-05, + "loss": 0.0492, + "step": 3778, + "task_loss": 0.18263056874275208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832242269620071, + "compression_loss": 0.0, + "distillation_loss": 0.024286724627017975, + "epoch": 3.59, + "learning_rate": 2.403593935679485e-05, + "loss": 0.0226, + "step": 3779, + "task_loss": 0.0070180464535951614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832350570281917, + "compression_loss": 0.0, + "distillation_loss": 0.05927498638629913, + "epoch": 3.59, + "learning_rate": 2.402529212438765e-05, + "loss": 0.0679, + "step": 3780, + "task_loss": 0.1452222764492035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783245882432273, + "compression_loss": 0.0, + "distillation_loss": 0.03792211413383484, + "epoch": 3.59, + "learning_rate": 2.401464506904082e-05, + "loss": 0.0416, + "step": 3781, + "task_loss": 0.07421617209911346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832567031752546, + "compression_loss": 0.0, + "distillation_loss": 0.1570988893508911, + "epoch": 3.59, + "learning_rate": 2.400399819268846e-05, + "loss": 0.1591, + "step": 3782, + "task_loss": 0.1775505691766739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832675192581403, + "compression_loss": 0.0, + "distillation_loss": 0.019645625725388527, + "epoch": 3.59, + "learning_rate": 2.399335149726463e-05, + "loss": 0.0182, + "step": 3783, + "task_loss": 0.0054306890815496445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832783306819336, + "compression_loss": 0.0, + "distillation_loss": 0.0738186463713646, + "epoch": 3.59, + "learning_rate": 2.3982704984703337e-05, + "loss": 0.068, + "step": 3784, + "task_loss": 0.015383878722786903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832891374476385, + "compression_loss": 0.0, + "distillation_loss": 0.02696252055466175, + "epoch": 3.59, + "learning_rate": 2.3972058656938587e-05, + "loss": 0.0331, + "step": 3785, + "task_loss": 0.08802430331707001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7832999395562583, + "compression_loss": 0.0, + "distillation_loss": 0.05371105670928955, + "epoch": 3.6, + "learning_rate": 2.3961412515904336e-05, + "loss": 0.0574, + "step": 3786, + "task_loss": 0.09069317579269409 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783310737008797, + "compression_loss": 0.0, + "distillation_loss": 0.0783916711807251, + "epoch": 3.6, + "learning_rate": 2.3950766563534508e-05, + "loss": 0.0778, + "step": 3787, + "task_loss": 0.07237330079078674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833215298062581, + "compression_loss": 0.0, + "distillation_loss": 0.18467172980308533, + "epoch": 3.6, + "learning_rate": 2.394012080176298e-05, + "loss": 0.1865, + "step": 3788, + "task_loss": 0.20330506563186646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833323179496454, + "compression_loss": 0.0, + "distillation_loss": 0.014792121946811676, + "epoch": 3.6, + "learning_rate": 2.3929475232523612e-05, + "loss": 0.0138, + "step": 3789, + "task_loss": 0.004657162353396416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833431014399624, + "compression_loss": 0.0, + "distillation_loss": 0.023432932794094086, + "epoch": 3.6, + "learning_rate": 2.3918829857750233e-05, + "loss": 0.0217, + "step": 3790, + "task_loss": 0.0064360033720731735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783353880278213, + "compression_loss": 0.0, + "distillation_loss": 0.023829486221075058, + "epoch": 3.6, + "learning_rate": 2.3908184679376608e-05, + "loss": 0.0219, + "step": 3791, + "task_loss": 0.004044756293296814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833646544654007, + "compression_loss": 0.0, + "distillation_loss": 0.10334857553243637, + "epoch": 3.6, + "learning_rate": 2.3897539699336503e-05, + "loss": 0.1155, + "step": 3792, + "task_loss": 0.22499464452266693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833754240025294, + "compression_loss": 0.0, + "distillation_loss": 0.020630966871976852, + "epoch": 3.6, + "learning_rate": 2.3886894919563603e-05, + "loss": 0.0257, + "step": 3793, + "task_loss": 0.07151245325803757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833861888906025, + "compression_loss": 0.0, + "distillation_loss": 0.09565474092960358, + "epoch": 3.6, + "learning_rate": 2.387625034199162e-05, + "loss": 0.0906, + "step": 3794, + "task_loss": 0.044928256422281265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7833969491306239, + "compression_loss": 0.0, + "distillation_loss": 0.061953071504831314, + "epoch": 3.6, + "learning_rate": 2.3865605968554163e-05, + "loss": 0.0614, + "step": 3795, + "task_loss": 0.056104809045791626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7834077047235971, + "compression_loss": 0.0, + "distillation_loss": 0.050460297614336014, + "epoch": 3.6, + "learning_rate": 2.385496180118485e-05, + "loss": 0.0469, + "step": 3796, + "task_loss": 0.014742163941264153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783418455670526, + "compression_loss": 0.0, + "distillation_loss": 0.07291211187839508, + "epoch": 3.61, + "learning_rate": 2.3844317841817245e-05, + "loss": 0.0822, + "step": 3797, + "task_loss": 0.1659778207540512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7834292019724142, + "compression_loss": 0.0, + "distillation_loss": 0.21132490038871765, + "epoch": 3.61, + "learning_rate": 2.3833674092384864e-05, + "loss": 0.2126, + "step": 3798, + "task_loss": 0.22443267703056335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7834399436302653, + "compression_loss": 0.0, + "distillation_loss": 0.11052772402763367, + "epoch": 3.61, + "learning_rate": 2.3823030554821208e-05, + "loss": 0.1048, + "step": 3799, + "task_loss": 0.053448185324668884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783450680645083, + "compression_loss": 0.0, + "distillation_loss": 0.02982490137219429, + "epoch": 3.61, + "learning_rate": 2.3812387231059712e-05, + "loss": 0.028, + "step": 3800, + "task_loss": 0.011723890900611877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7834614130178711, + "compression_loss": 0.0, + "distillation_loss": 0.03554895892739296, + "epoch": 3.61, + "learning_rate": 2.38017441230338e-05, + "loss": 0.0389, + "step": 3801, + "task_loss": 0.0686713308095932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7834721407496331, + "compression_loss": 0.0, + "distillation_loss": 0.03221399337053299, + "epoch": 3.61, + "learning_rate": 2.379110123267683e-05, + "loss": 0.0301, + "step": 3802, + "task_loss": 0.011455537751317024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7834828638413729, + "compression_loss": 0.0, + "distillation_loss": 0.09096793085336685, + "epoch": 3.61, + "learning_rate": 2.3780458561922147e-05, + "loss": 0.0944, + "step": 3803, + "task_loss": 0.12568344175815582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783493582294094, + "compression_loss": 0.0, + "distillation_loss": 0.034135833382606506, + "epoch": 3.61, + "learning_rate": 2.3769816112703047e-05, + "loss": 0.0383, + "step": 3804, + "task_loss": 0.07552683353424072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835042961088001, + "compression_loss": 0.0, + "distillation_loss": 0.021872466430068016, + "epoch": 3.61, + "learning_rate": 2.375917388695277e-05, + "loss": 0.0203, + "step": 3805, + "task_loss": 0.005825823172926903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835150052864951, + "compression_loss": 0.0, + "distillation_loss": 0.10276195406913757, + "epoch": 3.61, + "learning_rate": 2.3748531886604537e-05, + "loss": 0.1085, + "step": 3806, + "task_loss": 0.15964293479919434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835257098281824, + "compression_loss": 0.0, + "distillation_loss": 0.0308642890304327, + "epoch": 3.62, + "learning_rate": 2.3737890113591507e-05, + "loss": 0.0476, + "step": 3807, + "task_loss": 0.1985035240650177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835364097348657, + "compression_loss": 0.0, + "distillation_loss": 0.037669505923986435, + "epoch": 3.62, + "learning_rate": 2.372724856984682e-05, + "loss": 0.0412, + "step": 3808, + "task_loss": 0.07280071079730988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783547105007549, + "compression_loss": 0.0, + "distillation_loss": 0.09298324584960938, + "epoch": 3.62, + "learning_rate": 2.3716607257303563e-05, + "loss": 0.0889, + "step": 3809, + "task_loss": 0.051655180752277374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835577956472356, + "compression_loss": 0.0, + "distillation_loss": 0.11761534959077835, + "epoch": 3.62, + "learning_rate": 2.370596617789476e-05, + "loss": 0.1226, + "step": 3810, + "task_loss": 0.1676085889339447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835684816549293, + "compression_loss": 0.0, + "distillation_loss": 0.11634726822376251, + "epoch": 3.62, + "learning_rate": 2.369532533355345e-05, + "loss": 0.1063, + "step": 3811, + "task_loss": 0.01633966900408268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783579163031634, + "compression_loss": 0.0, + "distillation_loss": 0.09321273863315582, + "epoch": 3.62, + "learning_rate": 2.3684684726212574e-05, + "loss": 0.0988, + "step": 3812, + "task_loss": 0.14873237907886505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7835898397783531, + "compression_loss": 0.0, + "distillation_loss": 0.0937819555401802, + "epoch": 3.62, + "learning_rate": 2.3674044357805058e-05, + "loss": 0.0936, + "step": 3813, + "task_loss": 0.0917680636048317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836005118960904, + "compression_loss": 0.0, + "distillation_loss": 0.13557323813438416, + "epoch": 3.62, + "learning_rate": 2.3663404230263764e-05, + "loss": 0.1304, + "step": 3814, + "task_loss": 0.08434079587459564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836111793858495, + "compression_loss": 0.0, + "distillation_loss": 0.11180786788463593, + "epoch": 3.62, + "learning_rate": 2.3652764345521527e-05, + "loss": 0.1161, + "step": 3815, + "task_loss": 0.15484829246997833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836218422486342, + "compression_loss": 0.0, + "distillation_loss": 0.03923030570149422, + "epoch": 3.62, + "learning_rate": 2.364212470551114e-05, + "loss": 0.0417, + "step": 3816, + "task_loss": 0.06417812407016754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836325004854481, + "compression_loss": 0.0, + "distillation_loss": 0.06536407768726349, + "epoch": 3.62, + "learning_rate": 2.3631485312165337e-05, + "loss": 0.0612, + "step": 3817, + "task_loss": 0.023885080590844154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783643154097295, + "compression_loss": 0.0, + "distillation_loss": 0.2747918665409088, + "epoch": 3.63, + "learning_rate": 2.3620846167416816e-05, + "loss": 0.2686, + "step": 3818, + "task_loss": 0.21282097697257996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836538030851784, + "compression_loss": 0.0, + "distillation_loss": 0.03365989029407501, + "epoch": 3.63, + "learning_rate": 2.3610207273198224e-05, + "loss": 0.0308, + "step": 3819, + "task_loss": 0.005554560571908951 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836644474501022, + "compression_loss": 0.0, + "distillation_loss": 0.015433688648045063, + "epoch": 3.63, + "learning_rate": 2.359956863144219e-05, + "loss": 0.0143, + "step": 3820, + "task_loss": 0.004542894661426544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836750871930699, + "compression_loss": 0.0, + "distillation_loss": 0.08182269334793091, + "epoch": 3.63, + "learning_rate": 2.358893024408125e-05, + "loss": 0.0829, + "step": 3821, + "task_loss": 0.09263917803764343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836857223150853, + "compression_loss": 0.0, + "distillation_loss": 0.010741151869297028, + "epoch": 3.63, + "learning_rate": 2.3578292113047927e-05, + "loss": 0.0144, + "step": 3822, + "task_loss": 0.0472935251891613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7836963528171519, + "compression_loss": 0.0, + "distillation_loss": 0.10976468026638031, + "epoch": 3.63, + "learning_rate": 2.3567654240274694e-05, + "loss": 0.1043, + "step": 3823, + "task_loss": 0.05536213517189026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837069787002735, + "compression_loss": 0.0, + "distillation_loss": 0.03287689387798309, + "epoch": 3.63, + "learning_rate": 2.355701662769396e-05, + "loss": 0.0514, + "step": 3824, + "task_loss": 0.21781839430332184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837175999654539, + "compression_loss": 0.0, + "distillation_loss": 0.03763230890035629, + "epoch": 3.63, + "learning_rate": 2.3546379277238107e-05, + "loss": 0.0392, + "step": 3825, + "task_loss": 0.05362531542778015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837282166136966, + "compression_loss": 0.0, + "distillation_loss": 0.05240745097398758, + "epoch": 3.63, + "learning_rate": 2.3535742190839464e-05, + "loss": 0.0651, + "step": 3826, + "task_loss": 0.17899471521377563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837388286460054, + "compression_loss": 0.0, + "distillation_loss": 0.03421130031347275, + "epoch": 3.63, + "learning_rate": 2.3525105370430296e-05, + "loss": 0.0388, + "step": 3827, + "task_loss": 0.07988087832927704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783749436063384, + "compression_loss": 0.0, + "distillation_loss": 0.036636196076869965, + "epoch": 3.64, + "learning_rate": 2.351446881794284e-05, + "loss": 0.0541, + "step": 3828, + "task_loss": 0.21091753244400024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837600388668359, + "compression_loss": 0.0, + "distillation_loss": 0.029964663088321686, + "epoch": 3.64, + "learning_rate": 2.3503832535309282e-05, + "loss": 0.032, + "step": 3829, + "task_loss": 0.05079011991620064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837706370573649, + "compression_loss": 0.0, + "distillation_loss": 0.03737621754407883, + "epoch": 3.64, + "learning_rate": 2.3493196524461754e-05, + "loss": 0.0481, + "step": 3830, + "task_loss": 0.14465513825416565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837812306359748, + "compression_loss": 0.0, + "distillation_loss": 0.05270380899310112, + "epoch": 3.64, + "learning_rate": 2.3482560787332325e-05, + "loss": 0.052, + "step": 3831, + "task_loss": 0.04599842429161072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7837918196036691, + "compression_loss": 0.0, + "distillation_loss": 0.09582822024822235, + "epoch": 3.64, + "learning_rate": 2.3471925325853043e-05, + "loss": 0.0926, + "step": 3832, + "task_loss": 0.06387031823396683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838024039614515, + "compression_loss": 0.0, + "distillation_loss": 0.04903242364525795, + "epoch": 3.64, + "learning_rate": 2.3461290141955886e-05, + "loss": 0.0461, + "step": 3833, + "task_loss": 0.019668804481625557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838129837103258, + "compression_loss": 0.0, + "distillation_loss": 0.03492574021220207, + "epoch": 3.64, + "learning_rate": 2.345065523757278e-05, + "loss": 0.0341, + "step": 3834, + "task_loss": 0.02714421972632408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838235588512955, + "compression_loss": 0.0, + "distillation_loss": 0.08957400172948837, + "epoch": 3.64, + "learning_rate": 2.3440020614635618e-05, + "loss": 0.0918, + "step": 3835, + "task_loss": 0.11195338517427444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838341293853646, + "compression_loss": 0.0, + "distillation_loss": 0.029562341049313545, + "epoch": 3.64, + "learning_rate": 2.342938627507621e-05, + "loss": 0.0338, + "step": 3836, + "task_loss": 0.07171028107404709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838446953135364, + "compression_loss": 0.0, + "distillation_loss": 0.022509558126330376, + "epoch": 3.64, + "learning_rate": 2.3418752220826364e-05, + "loss": 0.0208, + "step": 3837, + "task_loss": 0.004994381219148636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838552566368148, + "compression_loss": 0.0, + "distillation_loss": 0.16286085546016693, + "epoch": 3.64, + "learning_rate": 2.3408118453817786e-05, + "loss": 0.1811, + "step": 3838, + "task_loss": 0.3450527787208557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838658133562034, + "compression_loss": 0.0, + "distillation_loss": 0.09071379154920578, + "epoch": 3.65, + "learning_rate": 2.339748497598216e-05, + "loss": 0.0975, + "step": 3839, + "task_loss": 0.1586238443851471 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.783876365472706, + "compression_loss": 0.0, + "distillation_loss": 0.06195370852947235, + "epoch": 3.65, + "learning_rate": 2.338685178925111e-05, + "loss": 0.0588, + "step": 3840, + "task_loss": 0.030472123995423317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838869129873262, + "compression_loss": 0.0, + "distillation_loss": 0.12223900854587555, + "epoch": 3.65, + "learning_rate": 2.3376218895556196e-05, + "loss": 0.1351, + "step": 3841, + "task_loss": 0.2507644295692444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7838974559010676, + "compression_loss": 0.0, + "distillation_loss": 0.18645763397216797, + "epoch": 3.65, + "learning_rate": 2.3365586296828944e-05, + "loss": 0.1835, + "step": 3842, + "task_loss": 0.15729686617851257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839079942149341, + "compression_loss": 0.0, + "distillation_loss": 0.14159414172172546, + "epoch": 3.65, + "learning_rate": 2.335495399500081e-05, + "loss": 0.1349, + "step": 3843, + "task_loss": 0.07418064028024673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839185279299291, + "compression_loss": 0.0, + "distillation_loss": 0.04566461965441704, + "epoch": 3.65, + "learning_rate": 2.3344321992003206e-05, + "loss": 0.0428, + "step": 3844, + "task_loss": 0.017158547416329384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839290570470565, + "compression_loss": 0.0, + "distillation_loss": 0.15318387746810913, + "epoch": 3.65, + "learning_rate": 2.3333690289767477e-05, + "loss": 0.1488, + "step": 3845, + "task_loss": 0.10943318903446198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839395815673199, + "compression_loss": 0.0, + "distillation_loss": 0.026680761948227882, + "epoch": 3.65, + "learning_rate": 2.3323058890224938e-05, + "loss": 0.0248, + "step": 3846, + "task_loss": 0.008314013481140137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839501014917231, + "compression_loss": 0.0, + "distillation_loss": 0.03555554896593094, + "epoch": 3.65, + "learning_rate": 2.3312427795306835e-05, + "loss": 0.0487, + "step": 3847, + "task_loss": 0.16655853390693665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839606168212695, + "compression_loss": 0.0, + "distillation_loss": 0.03203408420085907, + "epoch": 3.65, + "learning_rate": 2.330179700694434e-05, + "loss": 0.044, + "step": 3848, + "task_loss": 0.15183991193771362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839711275569631, + "compression_loss": 0.0, + "distillation_loss": 0.047503869980573654, + "epoch": 3.66, + "learning_rate": 2.329116652706861e-05, + "loss": 0.0442, + "step": 3849, + "task_loss": 0.01422363892197609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839816336998073, + "compression_loss": 0.0, + "distillation_loss": 0.04679963365197182, + "epoch": 3.66, + "learning_rate": 2.3280536357610704e-05, + "loss": 0.0571, + "step": 3850, + "task_loss": 0.1500503420829773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7839921352508061, + "compression_loss": 0.0, + "distillation_loss": 0.020564064383506775, + "epoch": 3.66, + "learning_rate": 2.3269906500501647e-05, + "loss": 0.0253, + "step": 3851, + "task_loss": 0.06842130422592163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840026322109629, + "compression_loss": 0.0, + "distillation_loss": 0.08808699995279312, + "epoch": 3.66, + "learning_rate": 2.325927695767241e-05, + "loss": 0.0912, + "step": 3852, + "task_loss": 0.11908883601427078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840131245812815, + "compression_loss": 0.0, + "distillation_loss": 0.036189496517181396, + "epoch": 3.66, + "learning_rate": 2.3248647731053894e-05, + "loss": 0.0553, + "step": 3853, + "task_loss": 0.22739224135875702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840236123627656, + "compression_loss": 0.0, + "distillation_loss": 0.05552596598863602, + "epoch": 3.66, + "learning_rate": 2.3238018822576947e-05, + "loss": 0.0509, + "step": 3854, + "task_loss": 0.009532701224088669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840340955564189, + "compression_loss": 0.0, + "distillation_loss": 0.11583777517080307, + "epoch": 3.66, + "learning_rate": 2.3227390234172372e-05, + "loss": 0.11, + "step": 3855, + "task_loss": 0.05766517296433449 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840445741632449, + "compression_loss": 0.0, + "distillation_loss": 0.04743620380759239, + "epoch": 3.66, + "learning_rate": 2.32167619677709e-05, + "loss": 0.0638, + "step": 3856, + "task_loss": 0.2115105539560318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840550481842475, + "compression_loss": 0.0, + "distillation_loss": 0.035333409905433655, + "epoch": 3.66, + "learning_rate": 2.3206134025303206e-05, + "loss": 0.0433, + "step": 3857, + "task_loss": 0.11506626009941101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840655176204303, + "compression_loss": 0.0, + "distillation_loss": 0.06820636987686157, + "epoch": 3.66, + "learning_rate": 2.31955064086999e-05, + "loss": 0.0815, + "step": 3858, + "task_loss": 0.20162644982337952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784075982472797, + "compression_loss": 0.0, + "distillation_loss": 0.11444197595119476, + "epoch": 3.66, + "learning_rate": 2.3184879119891552e-05, + "loss": 0.1087, + "step": 3859, + "task_loss": 0.057123761624097824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840864427423512, + "compression_loss": 0.0, + "distillation_loss": 0.09845539927482605, + "epoch": 3.67, + "learning_rate": 2.317425216080865e-05, + "loss": 0.111, + "step": 3860, + "task_loss": 0.22417478263378143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7840968984300967, + "compression_loss": 0.0, + "distillation_loss": 0.1071721538901329, + "epoch": 3.67, + "learning_rate": 2.3163625533381636e-05, + "loss": 0.1044, + "step": 3861, + "task_loss": 0.0796838253736496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841073495370371, + "compression_loss": 0.0, + "distillation_loss": 0.023740757256746292, + "epoch": 3.67, + "learning_rate": 2.315299923954088e-05, + "loss": 0.0288, + "step": 3862, + "task_loss": 0.07385114580392838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841177960641761, + "compression_loss": 0.0, + "distillation_loss": 0.04698867350816727, + "epoch": 3.67, + "learning_rate": 2.314237328121672e-05, + "loss": 0.0501, + "step": 3863, + "task_loss": 0.07777606695890427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841282380125174, + "compression_loss": 0.0, + "distillation_loss": 0.03970280662178993, + "epoch": 3.67, + "learning_rate": 2.3131747660339394e-05, + "loss": 0.0383, + "step": 3864, + "task_loss": 0.026047592982649803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841386753830647, + "compression_loss": 0.0, + "distillation_loss": 0.03670245781540871, + "epoch": 3.67, + "learning_rate": 2.3121122378839106e-05, + "loss": 0.0432, + "step": 3865, + "task_loss": 0.10145539790391922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841491081768216, + "compression_loss": 0.0, + "distillation_loss": 0.028605319559574127, + "epoch": 3.67, + "learning_rate": 2.3110497438645987e-05, + "loss": 0.0363, + "step": 3866, + "task_loss": 0.10599753260612488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841595363947919, + "compression_loss": 0.0, + "distillation_loss": 0.11348830163478851, + "epoch": 3.67, + "learning_rate": 2.3099872841690103e-05, + "loss": 0.108, + "step": 3867, + "task_loss": 0.05827032029628754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841699600379791, + "compression_loss": 0.0, + "distillation_loss": 0.08587086945772171, + "epoch": 3.67, + "learning_rate": 2.308924858990147e-05, + "loss": 0.0806, + "step": 3868, + "task_loss": 0.033285610377788544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841803791073871, + "compression_loss": 0.0, + "distillation_loss": 0.08816444873809814, + "epoch": 3.67, + "learning_rate": 2.3078624685210042e-05, + "loss": 0.0914, + "step": 3869, + "task_loss": 0.12038688361644745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7841907936040196, + "compression_loss": 0.0, + "distillation_loss": 0.13361284136772156, + "epoch": 3.68, + "learning_rate": 2.3068001129545686e-05, + "loss": 0.1276, + "step": 3870, + "task_loss": 0.07371234893798828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78420120352888, + "compression_loss": 0.0, + "distillation_loss": 0.033696770668029785, + "epoch": 3.68, + "learning_rate": 2.305737792483822e-05, + "loss": 0.0399, + "step": 3871, + "task_loss": 0.09580248594284058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842116088829721, + "compression_loss": 0.0, + "distillation_loss": 0.023039262741804123, + "epoch": 3.68, + "learning_rate": 2.3046755073017416e-05, + "loss": 0.0341, + "step": 3872, + "task_loss": 0.13404729962348938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842220096672998, + "compression_loss": 0.0, + "distillation_loss": 0.014469185844063759, + "epoch": 3.68, + "learning_rate": 2.303613257601297e-05, + "loss": 0.0137, + "step": 3873, + "task_loss": 0.006705537438392639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842324058828666, + "compression_loss": 0.0, + "distillation_loss": 0.06789643317461014, + "epoch": 3.68, + "learning_rate": 2.302551043575449e-05, + "loss": 0.0626, + "step": 3874, + "task_loss": 0.015251386910676956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784242797530676, + "compression_loss": 0.0, + "distillation_loss": 0.04347585514187813, + "epoch": 3.68, + "learning_rate": 2.301488865417155e-05, + "loss": 0.0488, + "step": 3875, + "task_loss": 0.09680940955877304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842531846117321, + "compression_loss": 0.0, + "distillation_loss": 0.04327643662691116, + "epoch": 3.68, + "learning_rate": 2.3004267233193655e-05, + "loss": 0.0394, + "step": 3876, + "task_loss": 0.004441501572728157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842635671270383, + "compression_loss": 0.0, + "distillation_loss": 0.04019487649202347, + "epoch": 3.68, + "learning_rate": 2.2993646174750217e-05, + "loss": 0.0394, + "step": 3877, + "task_loss": 0.032011084258556366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842739450775983, + "compression_loss": 0.0, + "distillation_loss": 0.024311494082212448, + "epoch": 3.68, + "learning_rate": 2.2983025480770627e-05, + "loss": 0.026, + "step": 3878, + "task_loss": 0.040784791111946106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842843184644157, + "compression_loss": 0.0, + "distillation_loss": 0.1018083393573761, + "epoch": 3.68, + "learning_rate": 2.297240515318416e-05, + "loss": 0.106, + "step": 3879, + "task_loss": 0.14343193173408508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7842946872884945, + "compression_loss": 0.0, + "distillation_loss": 0.12023551762104034, + "epoch": 3.68, + "learning_rate": 2.2961785193920058e-05, + "loss": 0.1204, + "step": 3880, + "task_loss": 0.1214599758386612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843050515508381, + "compression_loss": 0.0, + "distillation_loss": 0.16024965047836304, + "epoch": 3.69, + "learning_rate": 2.2951165604907497e-05, + "loss": 0.1819, + "step": 3881, + "task_loss": 0.37690994143486023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843154112524503, + "compression_loss": 0.0, + "distillation_loss": 0.1132129579782486, + "epoch": 3.69, + "learning_rate": 2.2940546388075572e-05, + "loss": 0.1066, + "step": 3882, + "task_loss": 0.04658997803926468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843257663943348, + "compression_loss": 0.0, + "distillation_loss": 0.08600252866744995, + "epoch": 3.69, + "learning_rate": 2.2929927545353323e-05, + "loss": 0.0817, + "step": 3883, + "task_loss": 0.0432564802467823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843361169774952, + "compression_loss": 0.0, + "distillation_loss": 0.06563156843185425, + "epoch": 3.69, + "learning_rate": 2.2919309078669697e-05, + "loss": 0.0658, + "step": 3884, + "task_loss": 0.06688161194324493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843464630029351, + "compression_loss": 0.0, + "distillation_loss": 0.0378117635846138, + "epoch": 3.69, + "learning_rate": 2.2908690989953598e-05, + "loss": 0.0374, + "step": 3885, + "task_loss": 0.03329796344041824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843568044716585, + "compression_loss": 0.0, + "distillation_loss": 0.08495815098285675, + "epoch": 3.69, + "learning_rate": 2.2898073281133853e-05, + "loss": 0.0777, + "step": 3886, + "task_loss": 0.01194603368639946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843671413846687, + "compression_loss": 0.0, + "distillation_loss": 0.03463613986968994, + "epoch": 3.69, + "learning_rate": 2.2887455954139217e-05, + "loss": 0.0355, + "step": 3887, + "task_loss": 0.04286995902657509 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843774737429696, + "compression_loss": 0.0, + "distillation_loss": 0.05026715248823166, + "epoch": 3.69, + "learning_rate": 2.2876839010898377e-05, + "loss": 0.0459, + "step": 3888, + "task_loss": 0.00638798251748085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843878015475649, + "compression_loss": 0.0, + "distillation_loss": 0.062215156853199005, + "epoch": 3.69, + "learning_rate": 2.286622245333996e-05, + "loss": 0.0596, + "step": 3889, + "task_loss": 0.03652032092213631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7843981247994581, + "compression_loss": 0.0, + "distillation_loss": 0.041499149054288864, + "epoch": 3.69, + "learning_rate": 2.2855606283392516e-05, + "loss": 0.0464, + "step": 3890, + "task_loss": 0.09033425897359848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844084434996532, + "compression_loss": 0.0, + "distillation_loss": 0.04093802720308304, + "epoch": 3.7, + "learning_rate": 2.2844990502984513e-05, + "loss": 0.0507, + "step": 3891, + "task_loss": 0.13819225132465363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844187576491536, + "compression_loss": 0.0, + "distillation_loss": 0.09159182012081146, + "epoch": 3.7, + "learning_rate": 2.2834375114044375e-05, + "loss": 0.0887, + "step": 3892, + "task_loss": 0.06316046416759491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844290672489631, + "compression_loss": 0.0, + "distillation_loss": 0.15046623349189758, + "epoch": 3.7, + "learning_rate": 2.2823760118500415e-05, + "loss": 0.1502, + "step": 3893, + "task_loss": 0.14759182929992676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844393723000853, + "compression_loss": 0.0, + "distillation_loss": 0.018377002328634262, + "epoch": 3.7, + "learning_rate": 2.2813145518280914e-05, + "loss": 0.0169, + "step": 3894, + "task_loss": 0.003946490585803986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784449672803524, + "compression_loss": 0.0, + "distillation_loss": 0.09543585032224655, + "epoch": 3.7, + "learning_rate": 2.2802531315314065e-05, + "loss": 0.101, + "step": 3895, + "task_loss": 0.15144692361354828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844599687602828, + "compression_loss": 0.0, + "distillation_loss": 0.17009879648685455, + "epoch": 3.7, + "learning_rate": 2.279191751152798e-05, + "loss": 0.1709, + "step": 3896, + "task_loss": 0.17769216001033783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844702601713653, + "compression_loss": 0.0, + "distillation_loss": 0.11712504923343658, + "epoch": 3.7, + "learning_rate": 2.2781304108850706e-05, + "loss": 0.1132, + "step": 3897, + "task_loss": 0.07775254547595978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844805470377754, + "compression_loss": 0.0, + "distillation_loss": 0.04855842888355255, + "epoch": 3.7, + "learning_rate": 2.2770691109210235e-05, + "loss": 0.0595, + "step": 3898, + "task_loss": 0.1582665592432022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7844908293605166, + "compression_loss": 0.0, + "distillation_loss": 0.07292292267084122, + "epoch": 3.7, + "learning_rate": 2.2760078514534462e-05, + "loss": 0.0764, + "step": 3899, + "task_loss": 0.10792665183544159 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845011071405927, + "compression_loss": 0.0, + "distillation_loss": 0.10464300960302353, + "epoch": 3.7, + "learning_rate": 2.2749466326751213e-05, + "loss": 0.1044, + "step": 3900, + "task_loss": 0.10204927623271942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845113803790073, + "compression_loss": 0.0, + "distillation_loss": 0.03131140023469925, + "epoch": 3.7, + "learning_rate": 2.273885454778824e-05, + "loss": 0.0288, + "step": 3901, + "task_loss": 0.0059508830308914185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845216490767641, + "compression_loss": 0.0, + "distillation_loss": 0.050689488649368286, + "epoch": 3.71, + "learning_rate": 2.272824317957324e-05, + "loss": 0.0619, + "step": 3902, + "task_loss": 0.16312363743782043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845319132348668, + "compression_loss": 0.0, + "distillation_loss": 0.11499884724617004, + "epoch": 3.71, + "learning_rate": 2.2717632224033796e-05, + "loss": 0.1087, + "step": 3903, + "task_loss": 0.051606349647045135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845421728543192, + "compression_loss": 0.0, + "distillation_loss": 0.15508276224136353, + "epoch": 3.71, + "learning_rate": 2.2707021683097454e-05, + "loss": 0.1501, + "step": 3904, + "task_loss": 0.10518581420183182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845524279361247, + "compression_loss": 0.0, + "distillation_loss": 0.0731472373008728, + "epoch": 3.71, + "learning_rate": 2.2696411558691672e-05, + "loss": 0.072, + "step": 3905, + "task_loss": 0.06154020130634308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845626784812871, + "compression_loss": 0.0, + "distillation_loss": 0.08605273813009262, + "epoch": 3.71, + "learning_rate": 2.268580185274381e-05, + "loss": 0.0926, + "step": 3906, + "task_loss": 0.15165117383003235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845729244908103, + "compression_loss": 0.0, + "distillation_loss": 0.020624032244086266, + "epoch": 3.71, + "learning_rate": 2.2675192567181197e-05, + "loss": 0.019, + "step": 3907, + "task_loss": 0.004072193056344986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7845831659656977, + "compression_loss": 0.0, + "distillation_loss": 0.09648367762565613, + "epoch": 3.71, + "learning_rate": 2.2664583703931047e-05, + "loss": 0.0974, + "step": 3908, + "task_loss": 0.10594139993190765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784593402906953, + "compression_loss": 0.0, + "distillation_loss": 0.03631186485290527, + "epoch": 3.71, + "learning_rate": 2.265397526492052e-05, + "loss": 0.0487, + "step": 3909, + "task_loss": 0.16029351949691772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846036353155801, + "compression_loss": 0.0, + "distillation_loss": 0.10707633942365646, + "epoch": 3.71, + "learning_rate": 2.2643367252076684e-05, + "loss": 0.1087, + "step": 3910, + "task_loss": 0.12293732911348343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846138631925825, + "compression_loss": 0.0, + "distillation_loss": 0.027988499030470848, + "epoch": 3.71, + "learning_rate": 2.263275966732653e-05, + "loss": 0.0256, + "step": 3911, + "task_loss": 0.00423845648765564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784624086538964, + "compression_loss": 0.0, + "distillation_loss": 0.06167607381939888, + "epoch": 3.72, + "learning_rate": 2.262215251259699e-05, + "loss": 0.0595, + "step": 3912, + "task_loss": 0.04003376513719559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846343053557281, + "compression_loss": 0.0, + "distillation_loss": 0.10655433684587479, + "epoch": 3.72, + "learning_rate": 2.2611545789814898e-05, + "loss": 0.111, + "step": 3913, + "task_loss": 0.15064670145511627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846445196438786, + "compression_loss": 0.0, + "distillation_loss": 0.04435639828443527, + "epoch": 3.72, + "learning_rate": 2.2600939500907007e-05, + "loss": 0.0445, + "step": 3914, + "task_loss": 0.04605961963534355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846547294044193, + "compression_loss": 0.0, + "distillation_loss": 0.08209598064422607, + "epoch": 3.72, + "learning_rate": 2.2590333647800013e-05, + "loss": 0.0942, + "step": 3915, + "task_loss": 0.2029554843902588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846649346383536, + "compression_loss": 0.0, + "distillation_loss": 0.03713390976190567, + "epoch": 3.72, + "learning_rate": 2.2579728232420525e-05, + "loss": 0.0419, + "step": 3916, + "task_loss": 0.08507491648197174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846751353466855, + "compression_loss": 0.0, + "distillation_loss": 0.08504173159599304, + "epoch": 3.72, + "learning_rate": 2.2569123256695056e-05, + "loss": 0.0915, + "step": 3917, + "task_loss": 0.1496903896331787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846853315304185, + "compression_loss": 0.0, + "distillation_loss": 0.06336773186922073, + "epoch": 3.72, + "learning_rate": 2.2558518722550048e-05, + "loss": 0.0811, + "step": 3918, + "task_loss": 0.2407415211200714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7846955231905562, + "compression_loss": 0.0, + "distillation_loss": 0.08145183324813843, + "epoch": 3.72, + "learning_rate": 2.2547914631911884e-05, + "loss": 0.0866, + "step": 3919, + "task_loss": 0.1333352029323578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7847057103281025, + "compression_loss": 0.0, + "distillation_loss": 0.10730397701263428, + "epoch": 3.72, + "learning_rate": 2.2537310986706826e-05, + "loss": 0.1128, + "step": 3920, + "task_loss": 0.16203323006629944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784715892944061, + "compression_loss": 0.0, + "distillation_loss": 0.041547950357198715, + "epoch": 3.72, + "learning_rate": 2.2526707788861098e-05, + "loss": 0.0386, + "step": 3921, + "task_loss": 0.011634528636932373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7847260710394351, + "compression_loss": 0.0, + "distillation_loss": 0.06141505762934685, + "epoch": 3.72, + "learning_rate": 2.2516105040300804e-05, + "loss": 0.0588, + "step": 3922, + "task_loss": 0.03478226065635681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784736244615229, + "compression_loss": 0.0, + "distillation_loss": 0.06806781888008118, + "epoch": 3.73, + "learning_rate": 2.250550274295199e-05, + "loss": 0.0689, + "step": 3923, + "task_loss": 0.07671485841274261 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.784746413672446, + "compression_loss": 0.0, + "distillation_loss": 0.05792640894651413, + "epoch": 3.73, + "learning_rate": 2.2494900898740616e-05, + "loss": 0.0609, + "step": 3924, + "task_loss": 0.08743541687726974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78475657821209, + "compression_loss": 0.0, + "distillation_loss": 0.04159487783908844, + "epoch": 3.73, + "learning_rate": 2.2484299509592563e-05, + "loss": 0.0539, + "step": 3925, + "task_loss": 0.16499102115631104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7847667382351645, + "compression_loss": 0.0, + "distillation_loss": 0.038866378366947174, + "epoch": 3.73, + "learning_rate": 2.247369857743362e-05, + "loss": 0.0449, + "step": 3926, + "task_loss": 0.0994512215256691 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7847768937426732, + "compression_loss": 0.0, + "distillation_loss": 0.14317655563354492, + "epoch": 3.73, + "learning_rate": 2.2463098104189497e-05, + "loss": 0.1352, + "step": 3927, + "task_loss": 0.06382149457931519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78478704473562, + "compression_loss": 0.0, + "distillation_loss": 0.06667514145374298, + "epoch": 3.73, + "learning_rate": 2.2452498091785825e-05, + "loss": 0.0739, + "step": 3928, + "task_loss": 0.13881635665893555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7847971912150083, + "compression_loss": 0.0, + "distillation_loss": 0.015922335907816887, + "epoch": 3.73, + "learning_rate": 2.244189854214814e-05, + "loss": 0.0232, + "step": 3929, + "task_loss": 0.08892001956701279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848073331818419, + "compression_loss": 0.0, + "distillation_loss": 0.12270977348089218, + "epoch": 3.73, + "learning_rate": 2.24312994572019e-05, + "loss": 0.1286, + "step": 3930, + "task_loss": 0.1818765252828598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848174706371246, + "compression_loss": 0.0, + "distillation_loss": 0.09202725440263748, + "epoch": 3.73, + "learning_rate": 2.2420700838872493e-05, + "loss": 0.0922, + "step": 3931, + "task_loss": 0.09422404319047928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78482760358186, + "compression_loss": 0.0, + "distillation_loss": 0.22878801822662354, + "epoch": 3.73, + "learning_rate": 2.2410102689085185e-05, + "loss": 0.2324, + "step": 3932, + "task_loss": 0.2648504376411438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848377320170516, + "compression_loss": 0.0, + "distillation_loss": 0.03465723991394043, + "epoch": 3.74, + "learning_rate": 2.2399505009765214e-05, + "loss": 0.0383, + "step": 3933, + "task_loss": 0.07061752676963806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848478559437033, + "compression_loss": 0.0, + "distillation_loss": 0.046556901186704636, + "epoch": 3.74, + "learning_rate": 2.2388907802837676e-05, + "loss": 0.0425, + "step": 3934, + "task_loss": 0.005850574001669884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848579753628188, + "compression_loss": 0.0, + "distillation_loss": 0.02604757435619831, + "epoch": 3.74, + "learning_rate": 2.2378311070227616e-05, + "loss": 0.0313, + "step": 3935, + "task_loss": 0.07850679755210876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848680902754016, + "compression_loss": 0.0, + "distillation_loss": 0.032080747187137604, + "epoch": 3.74, + "learning_rate": 2.2367714813859967e-05, + "loss": 0.0366, + "step": 3936, + "task_loss": 0.07736522704362869 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848782006824555, + "compression_loss": 0.0, + "distillation_loss": 0.030702613294124603, + "epoch": 3.74, + "learning_rate": 2.23571190356596e-05, + "loss": 0.0332, + "step": 3937, + "task_loss": 0.05548100546002388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848883065849842, + "compression_loss": 0.0, + "distillation_loss": 0.06845727562904358, + "epoch": 3.74, + "learning_rate": 2.2346523737551296e-05, + "loss": 0.0701, + "step": 3938, + "task_loss": 0.08478248119354248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7848984079839914, + "compression_loss": 0.0, + "distillation_loss": 0.030870838090777397, + "epoch": 3.74, + "learning_rate": 2.233592892145973e-05, + "loss": 0.0387, + "step": 3939, + "task_loss": 0.10916754603385925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849085048804807, + "compression_loss": 0.0, + "distillation_loss": 0.1040521040558815, + "epoch": 3.74, + "learning_rate": 2.23253345893095e-05, + "loss": 0.1096, + "step": 3940, + "task_loss": 0.15946507453918457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849185972754558, + "compression_loss": 0.0, + "distillation_loss": 0.04144272953271866, + "epoch": 3.74, + "learning_rate": 2.231474074302513e-05, + "loss": 0.051, + "step": 3941, + "task_loss": 0.13734087347984314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849286851699203, + "compression_loss": 0.0, + "distillation_loss": 0.11080522835254669, + "epoch": 3.74, + "learning_rate": 2.2304147384531038e-05, + "loss": 0.115, + "step": 3942, + "task_loss": 0.15283827483654022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849387685648781, + "compression_loss": 0.0, + "distillation_loss": 0.07673658430576324, + "epoch": 3.74, + "learning_rate": 2.2293554515751552e-05, + "loss": 0.08, + "step": 3943, + "task_loss": 0.10954262316226959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849488474613326, + "compression_loss": 0.0, + "distillation_loss": 0.13339099287986755, + "epoch": 3.75, + "learning_rate": 2.2282962138610924e-05, + "loss": 0.1443, + "step": 3944, + "task_loss": 0.2422405332326889 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849589218602878, + "compression_loss": 0.0, + "distillation_loss": 0.05010555312037468, + "epoch": 3.75, + "learning_rate": 2.2272370255033314e-05, + "loss": 0.058, + "step": 3945, + "task_loss": 0.1292436569929123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849689917627473, + "compression_loss": 0.0, + "distillation_loss": 0.04166126996278763, + "epoch": 3.75, + "learning_rate": 2.226177886694278e-05, + "loss": 0.0422, + "step": 3946, + "task_loss": 0.04674288257956505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849790571697145, + "compression_loss": 0.0, + "distillation_loss": 0.05979571491479874, + "epoch": 3.75, + "learning_rate": 2.22511879762633e-05, + "loss": 0.0623, + "step": 3947, + "task_loss": 0.08474962413311005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849891180821934, + "compression_loss": 0.0, + "distillation_loss": 0.09109986573457718, + "epoch": 3.75, + "learning_rate": 2.2240597584918768e-05, + "loss": 0.0975, + "step": 3948, + "task_loss": 0.155453622341156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7849991745011875, + "compression_loss": 0.0, + "distillation_loss": 0.07806719839572906, + "epoch": 3.75, + "learning_rate": 2.2230007694832963e-05, + "loss": 0.0722, + "step": 3949, + "task_loss": 0.018949225544929504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850092264277005, + "compression_loss": 0.0, + "distillation_loss": 0.03633013740181923, + "epoch": 3.75, + "learning_rate": 2.2219418307929607e-05, + "loss": 0.0392, + "step": 3950, + "task_loss": 0.06507201492786407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850192738627363, + "compression_loss": 0.0, + "distillation_loss": 0.029751278460025787, + "epoch": 3.75, + "learning_rate": 2.2208829426132307e-05, + "loss": 0.0389, + "step": 3951, + "task_loss": 0.12085320800542831 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850293168072984, + "compression_loss": 0.0, + "distillation_loss": 0.056585509330034256, + "epoch": 3.75, + "learning_rate": 2.219824105136459e-05, + "loss": 0.0607, + "step": 3952, + "task_loss": 0.0976976677775383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850393552623904, + "compression_loss": 0.0, + "distillation_loss": 0.029133249074220657, + "epoch": 3.75, + "learning_rate": 2.218765318554987e-05, + "loss": 0.0329, + "step": 3953, + "task_loss": 0.06695835292339325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850493892290161, + "compression_loss": 0.0, + "distillation_loss": 0.04859749227762222, + "epoch": 3.75, + "learning_rate": 2.2177065830611498e-05, + "loss": 0.046, + "step": 3954, + "task_loss": 0.022643616423010826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850594187081792, + "compression_loss": 0.0, + "distillation_loss": 0.0373307503759861, + "epoch": 3.76, + "learning_rate": 2.2166478988472716e-05, + "loss": 0.0352, + "step": 3955, + "task_loss": 0.015915043652057648 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850694437008833, + "compression_loss": 0.0, + "distillation_loss": 0.05053357779979706, + "epoch": 3.76, + "learning_rate": 2.215589266105667e-05, + "loss": 0.0505, + "step": 3956, + "task_loss": 0.049804605543613434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850794642081321, + "compression_loss": 0.0, + "distillation_loss": 0.016781002283096313, + "epoch": 3.76, + "learning_rate": 2.2145306850286424e-05, + "loss": 0.0155, + "step": 3957, + "task_loss": 0.00426999107003212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850894802309294, + "compression_loss": 0.0, + "distillation_loss": 0.06677426397800446, + "epoch": 3.76, + "learning_rate": 2.2134721558084917e-05, + "loss": 0.0677, + "step": 3958, + "task_loss": 0.07622140645980835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7850994917702788, + "compression_loss": 0.0, + "distillation_loss": 0.031092319637537003, + "epoch": 3.76, + "learning_rate": 2.2124136786375057e-05, + "loss": 0.0348, + "step": 3959, + "task_loss": 0.06810992956161499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785109498827184, + "compression_loss": 0.0, + "distillation_loss": 0.04789033159613609, + "epoch": 3.76, + "learning_rate": 2.2113552537079597e-05, + "loss": 0.0456, + "step": 3960, + "task_loss": 0.025089185684919357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851195014026485, + "compression_loss": 0.0, + "distillation_loss": 0.03125907480716705, + "epoch": 3.76, + "learning_rate": 2.2102968812121218e-05, + "loss": 0.0292, + "step": 3961, + "task_loss": 0.011070974171161652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851294994976762, + "compression_loss": 0.0, + "distillation_loss": 0.0916096568107605, + "epoch": 3.76, + "learning_rate": 2.209238561342251e-05, + "loss": 0.0876, + "step": 3962, + "task_loss": 0.05148671194911003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851394931132707, + "compression_loss": 0.0, + "distillation_loss": 0.17331336438655853, + "epoch": 3.76, + "learning_rate": 2.2081802942905955e-05, + "loss": 0.1803, + "step": 3963, + "task_loss": 0.24269163608551025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851494822504358, + "compression_loss": 0.0, + "distillation_loss": 0.022669488564133644, + "epoch": 3.76, + "learning_rate": 2.2071220802493954e-05, + "loss": 0.0208, + "step": 3964, + "task_loss": 0.0038123298436403275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785159466910175, + "compression_loss": 0.0, + "distillation_loss": 0.07016949355602264, + "epoch": 3.77, + "learning_rate": 2.2060639194108794e-05, + "loss": 0.0671, + "step": 3965, + "task_loss": 0.039382204413414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785169447093492, + "compression_loss": 0.0, + "distillation_loss": 0.05281013250350952, + "epoch": 3.77, + "learning_rate": 2.2050058119672677e-05, + "loss": 0.0531, + "step": 3966, + "task_loss": 0.05565192550420761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851794228013906, + "compression_loss": 0.0, + "distillation_loss": 0.1318967640399933, + "epoch": 3.77, + "learning_rate": 2.2039477581107714e-05, + "loss": 0.1256, + "step": 3967, + "task_loss": 0.06887546926736832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851893940348745, + "compression_loss": 0.0, + "distillation_loss": 0.0660499557852745, + "epoch": 3.77, + "learning_rate": 2.2028897580335906e-05, + "loss": 0.0626, + "step": 3968, + "task_loss": 0.03172999620437622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7851993607949473, + "compression_loss": 0.0, + "distillation_loss": 0.03410178795456886, + "epoch": 3.77, + "learning_rate": 2.2018318119279168e-05, + "loss": 0.0312, + "step": 3969, + "task_loss": 0.004658637568354607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852093230826127, + "compression_loss": 0.0, + "distillation_loss": 0.16602616012096405, + "epoch": 3.77, + "learning_rate": 2.2007739199859294e-05, + "loss": 0.169, + "step": 3970, + "task_loss": 0.19587615132331848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852192808988743, + "compression_loss": 0.0, + "distillation_loss": 0.05224110186100006, + "epoch": 3.77, + "learning_rate": 2.1997160823998016e-05, + "loss": 0.0481, + "step": 3971, + "task_loss": 0.01118180900812149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852292342447359, + "compression_loss": 0.0, + "distillation_loss": 0.055276624858379364, + "epoch": 3.77, + "learning_rate": 2.1986582993616926e-05, + "loss": 0.0502, + "step": 3972, + "task_loss": 0.004159906879067421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852391831212011, + "compression_loss": 0.0, + "distillation_loss": 0.05159646272659302, + "epoch": 3.77, + "learning_rate": 2.1976005710637554e-05, + "loss": 0.0558, + "step": 3973, + "task_loss": 0.09320049732923508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852491275292737, + "compression_loss": 0.0, + "distillation_loss": 0.10250347852706909, + "epoch": 3.77, + "learning_rate": 2.196542897698131e-05, + "loss": 0.1108, + "step": 3974, + "task_loss": 0.185111865401268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852590674699573, + "compression_loss": 0.0, + "distillation_loss": 0.04058488458395004, + "epoch": 3.77, + "learning_rate": 2.1954852794569493e-05, + "loss": 0.0404, + "step": 3975, + "task_loss": 0.03915491700172424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852690029442555, + "compression_loss": 0.0, + "distillation_loss": 0.012665364891290665, + "epoch": 3.78, + "learning_rate": 2.194427716532334e-05, + "loss": 0.0117, + "step": 3976, + "task_loss": 0.0031289253383874893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852789339531722, + "compression_loss": 0.0, + "distillation_loss": 0.07077420502901077, + "epoch": 3.78, + "learning_rate": 2.193370209116396e-05, + "loss": 0.0676, + "step": 3977, + "task_loss": 0.038839004933834076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852888604977108, + "compression_loss": 0.0, + "distillation_loss": 0.039995912462472916, + "epoch": 3.78, + "learning_rate": 2.1923127574012365e-05, + "loss": 0.0426, + "step": 3978, + "task_loss": 0.06648865342140198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7852987825788752, + "compression_loss": 0.0, + "distillation_loss": 0.06262241303920746, + "epoch": 3.78, + "learning_rate": 2.1912553615789462e-05, + "loss": 0.0587, + "step": 3979, + "task_loss": 0.023578649386763573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853087001976691, + "compression_loss": 0.0, + "distillation_loss": 0.05083940178155899, + "epoch": 3.78, + "learning_rate": 2.190198021841606e-05, + "loss": 0.0518, + "step": 3980, + "task_loss": 0.05996212735772133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853186133550959, + "compression_loss": 0.0, + "distillation_loss": 0.0958191305398941, + "epoch": 3.78, + "learning_rate": 2.189140738381288e-05, + "loss": 0.0944, + "step": 3981, + "task_loss": 0.08185603469610214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853285220521596, + "compression_loss": 0.0, + "distillation_loss": 0.1051722839474678, + "epoch": 3.78, + "learning_rate": 2.188083511390051e-05, + "loss": 0.1112, + "step": 3982, + "task_loss": 0.1649647355079651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853384262898637, + "compression_loss": 0.0, + "distillation_loss": 0.10239297151565552, + "epoch": 3.78, + "learning_rate": 2.1870263410599464e-05, + "loss": 0.1246, + "step": 3983, + "task_loss": 0.3248556852340698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785348326069212, + "compression_loss": 0.0, + "distillation_loss": 0.04012586921453476, + "epoch": 3.78, + "learning_rate": 2.185969227583014e-05, + "loss": 0.0377, + "step": 3984, + "task_loss": 0.015774773433804512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853582213912081, + "compression_loss": 0.0, + "distillation_loss": 0.019525595009326935, + "epoch": 3.78, + "learning_rate": 2.1849121711512847e-05, + "loss": 0.0185, + "step": 3985, + "task_loss": 0.009618887677788734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853681122568558, + "compression_loss": 0.0, + "distillation_loss": 0.11984270811080933, + "epoch": 3.79, + "learning_rate": 2.1838551719567767e-05, + "loss": 0.1232, + "step": 3986, + "task_loss": 0.1538366973400116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853779986671586, + "compression_loss": 0.0, + "distillation_loss": 0.10699717700481415, + "epoch": 3.79, + "learning_rate": 2.1827982301914993e-05, + "loss": 0.1009, + "step": 3987, + "task_loss": 0.04600843787193298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853878806231203, + "compression_loss": 0.0, + "distillation_loss": 0.018371235579252243, + "epoch": 3.79, + "learning_rate": 2.1817413460474514e-05, + "loss": 0.0415, + "step": 3988, + "task_loss": 0.2499411404132843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7853977581257445, + "compression_loss": 0.0, + "distillation_loss": 0.07233263552188873, + "epoch": 3.79, + "learning_rate": 2.18068451971662e-05, + "loss": 0.0799, + "step": 3989, + "task_loss": 0.14828746020793915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785407631176035, + "compression_loss": 0.0, + "distillation_loss": 0.04718535393476486, + "epoch": 3.79, + "learning_rate": 2.1796277513909838e-05, + "loss": 0.0454, + "step": 3990, + "task_loss": 0.029316851869225502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854174997749954, + "compression_loss": 0.0, + "distillation_loss": 0.03916257619857788, + "epoch": 3.79, + "learning_rate": 2.1785710412625098e-05, + "loss": 0.0363, + "step": 3991, + "task_loss": 0.010277681052684784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854273639236293, + "compression_loss": 0.0, + "distillation_loss": 0.179625004529953, + "epoch": 3.79, + "learning_rate": 2.1775143895231533e-05, + "loss": 0.1678, + "step": 3992, + "task_loss": 0.0614137165248394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854372236229407, + "compression_loss": 0.0, + "distillation_loss": 0.11892496049404144, + "epoch": 3.79, + "learning_rate": 2.1764577963648614e-05, + "loss": 0.1157, + "step": 3993, + "task_loss": 0.08710844814777374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854470788739328, + "compression_loss": 0.0, + "distillation_loss": 0.04435303807258606, + "epoch": 3.79, + "learning_rate": 2.175401261979569e-05, + "loss": 0.0626, + "step": 3994, + "task_loss": 0.2271975576877594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854569296776097, + "compression_loss": 0.0, + "distillation_loss": 0.04650871828198433, + "epoch": 3.79, + "learning_rate": 2.1743447865592016e-05, + "loss": 0.0441, + "step": 3995, + "task_loss": 0.022587845101952553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785466776034975, + "compression_loss": 0.0, + "distillation_loss": 0.09561876952648163, + "epoch": 3.79, + "learning_rate": 2.173288370295671e-05, + "loss": 0.0919, + "step": 3996, + "task_loss": 0.058294668793678284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854766179470322, + "compression_loss": 0.0, + "distillation_loss": 0.054196953773498535, + "epoch": 3.8, + "learning_rate": 2.1722320133808815e-05, + "loss": 0.0555, + "step": 3997, + "task_loss": 0.06700082123279572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854864554147851, + "compression_loss": 0.0, + "distillation_loss": 0.09690621495246887, + "epoch": 3.8, + "learning_rate": 2.171175716006726e-05, + "loss": 0.1012, + "step": 3998, + "task_loss": 0.14022237062454224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7854962884392374, + "compression_loss": 0.0, + "distillation_loss": 0.04198819026350975, + "epoch": 3.8, + "learning_rate": 2.1701194783650846e-05, + "loss": 0.0455, + "step": 3999, + "task_loss": 0.07720094919204712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855061170213927, + "compression_loss": 0.0, + "distillation_loss": 0.12908399105072021, + "epoch": 3.8, + "learning_rate": 2.1690633006478293e-05, + "loss": 0.1321, + "step": 4000, + "task_loss": 0.15927539765834808 + }, + { + "epoch": 3.8, + "eval_accuracy": 0.8944954128440367, + "eval_loss": 0.41921311616897583, + "eval_runtime": 18.4061, + "eval_samples_per_second": 47.376, + "eval_steps_per_second": 5.922, + "step": 4000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855159411622548, + "compression_loss": 0.0, + "distillation_loss": 0.0658789873123169, + "epoch": 3.8, + "learning_rate": 2.1680071830468178e-05, + "loss": 0.0634, + "step": 4001, + "task_loss": 0.04087566211819649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855257608628273, + "compression_loss": 0.0, + "distillation_loss": 0.06824058294296265, + "epoch": 3.8, + "learning_rate": 2.166951125753902e-05, + "loss": 0.0721, + "step": 4002, + "task_loss": 0.10674886405467987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855355761241138, + "compression_loss": 0.0, + "distillation_loss": 0.12798132002353668, + "epoch": 3.8, + "learning_rate": 2.1658951289609174e-05, + "loss": 0.1246, + "step": 4003, + "task_loss": 0.09367252886295319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855453869471183, + "compression_loss": 0.0, + "distillation_loss": 0.15844711661338806, + "epoch": 3.8, + "learning_rate": 2.1648391928596917e-05, + "loss": 0.1684, + "step": 4004, + "task_loss": 0.25770193338394165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855551933328442, + "compression_loss": 0.0, + "distillation_loss": 0.029653755947947502, + "epoch": 3.8, + "learning_rate": 2.1637833176420417e-05, + "loss": 0.0273, + "step": 4005, + "task_loss": 0.005788305774331093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855649952822952, + "compression_loss": 0.0, + "distillation_loss": 0.07723814994096756, + "epoch": 3.8, + "learning_rate": 2.1627275034997704e-05, + "loss": 0.074, + "step": 4006, + "task_loss": 0.04511658847332001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785574792796475, + "compression_loss": 0.0, + "distillation_loss": 0.03552757203578949, + "epoch": 3.81, + "learning_rate": 2.161671750624673e-05, + "loss": 0.0364, + "step": 4007, + "task_loss": 0.04422738030552864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855845858763874, + "compression_loss": 0.0, + "distillation_loss": 0.02493412420153618, + "epoch": 3.81, + "learning_rate": 2.160616059208531e-05, + "loss": 0.0286, + "step": 4008, + "task_loss": 0.06140220910310745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7855943745230358, + "compression_loss": 0.0, + "distillation_loss": 0.024118445813655853, + "epoch": 3.81, + "learning_rate": 2.1595604294431158e-05, + "loss": 0.041, + "step": 4009, + "task_loss": 0.19306635856628418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856041587374243, + "compression_loss": 0.0, + "distillation_loss": 0.06833194196224213, + "epoch": 3.81, + "learning_rate": 2.1585048615201885e-05, + "loss": 0.0668, + "step": 4010, + "task_loss": 0.05287961661815643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856139385205563, + "compression_loss": 0.0, + "distillation_loss": 0.08782090991735458, + "epoch": 3.81, + "learning_rate": 2.1574493556314983e-05, + "loss": 0.1022, + "step": 4011, + "task_loss": 0.23139870166778564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856237138734354, + "compression_loss": 0.0, + "distillation_loss": 0.03765169531106949, + "epoch": 3.81, + "learning_rate": 2.1563939119687828e-05, + "loss": 0.0461, + "step": 4012, + "task_loss": 0.12255912274122238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856334847970656, + "compression_loss": 0.0, + "distillation_loss": 0.047839343547821045, + "epoch": 3.81, + "learning_rate": 2.155338530723767e-05, + "loss": 0.0597, + "step": 4013, + "task_loss": 0.16615822911262512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856432512924504, + "compression_loss": 0.0, + "distillation_loss": 0.0834595113992691, + "epoch": 3.81, + "learning_rate": 2.154283212088168e-05, + "loss": 0.0868, + "step": 4014, + "task_loss": 0.11699174344539642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856530133605933, + "compression_loss": 0.0, + "distillation_loss": 0.028070103377103806, + "epoch": 3.81, + "learning_rate": 2.1532279562536874e-05, + "loss": 0.0308, + "step": 4015, + "task_loss": 0.05518289655447006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856627710024984, + "compression_loss": 0.0, + "distillation_loss": 0.019197819754481316, + "epoch": 3.81, + "learning_rate": 2.1521727634120192e-05, + "loss": 0.0184, + "step": 4016, + "task_loss": 0.011533919721841812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785672524219169, + "compression_loss": 0.0, + "distillation_loss": 0.028306379914283752, + "epoch": 3.81, + "learning_rate": 2.151117633754844e-05, + "loss": 0.0421, + "step": 4017, + "task_loss": 0.1661168932914734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856822730116091, + "compression_loss": 0.0, + "distillation_loss": 0.026535285636782646, + "epoch": 3.82, + "learning_rate": 2.150062567473829e-05, + "loss": 0.0298, + "step": 4018, + "task_loss": 0.058951202780008316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7856920173808221, + "compression_loss": 0.0, + "distillation_loss": 0.040127944201231, + "epoch": 3.82, + "learning_rate": 2.1490075647606363e-05, + "loss": 0.0386, + "step": 4019, + "task_loss": 0.025223994627594948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857017573278118, + "compression_loss": 0.0, + "distillation_loss": 0.02355261519551277, + "epoch": 3.82, + "learning_rate": 2.1479526258069087e-05, + "loss": 0.0352, + "step": 4020, + "task_loss": 0.13954126834869385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857114928535818, + "compression_loss": 0.0, + "distillation_loss": 0.03306501358747482, + "epoch": 3.82, + "learning_rate": 2.1468977508042824e-05, + "loss": 0.0479, + "step": 4021, + "task_loss": 0.1817673295736313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785721223959136, + "compression_loss": 0.0, + "distillation_loss": 0.06848086416721344, + "epoch": 3.82, + "learning_rate": 2.14584293994438e-05, + "loss": 0.0723, + "step": 4022, + "task_loss": 0.10678940266370773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857309506454778, + "compression_loss": 0.0, + "distillation_loss": 0.03855840116739273, + "epoch": 3.82, + "learning_rate": 2.1447881934188134e-05, + "loss": 0.0358, + "step": 4023, + "task_loss": 0.010923238471150398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857406729136112, + "compression_loss": 0.0, + "distillation_loss": 0.021052051335573196, + "epoch": 3.82, + "learning_rate": 2.143733511419183e-05, + "loss": 0.0193, + "step": 4024, + "task_loss": 0.003061491996049881 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857503907645396, + "compression_loss": 0.0, + "distillation_loss": 0.10116279125213623, + "epoch": 3.82, + "learning_rate": 2.142678894137075e-05, + "loss": 0.1047, + "step": 4025, + "task_loss": 0.1363639235496521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857601041992668, + "compression_loss": 0.0, + "distillation_loss": 0.19848263263702393, + "epoch": 3.82, + "learning_rate": 2.1416243417640668e-05, + "loss": 0.1934, + "step": 4026, + "task_loss": 0.14759844541549683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857698132187965, + "compression_loss": 0.0, + "distillation_loss": 0.10897897183895111, + "epoch": 3.82, + "learning_rate": 2.1405698544917225e-05, + "loss": 0.1032, + "step": 4027, + "task_loss": 0.051134128123521805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857795178241324, + "compression_loss": 0.0, + "distillation_loss": 0.06620095670223236, + "epoch": 3.83, + "learning_rate": 2.139515432511596e-05, + "loss": 0.0624, + "step": 4028, + "task_loss": 0.0285488348454237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785789218016278, + "compression_loss": 0.0, + "distillation_loss": 0.1283935308456421, + "epoch": 3.83, + "learning_rate": 2.138461076015227e-05, + "loss": 0.1225, + "step": 4029, + "task_loss": 0.0697251558303833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7857989137962372, + "compression_loss": 0.0, + "distillation_loss": 0.05554177612066269, + "epoch": 3.83, + "learning_rate": 2.1374067851941445e-05, + "loss": 0.0517, + "step": 4030, + "task_loss": 0.017079105600714684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858086051650136, + "compression_loss": 0.0, + "distillation_loss": 0.015999089926481247, + "epoch": 3.83, + "learning_rate": 2.1363525602398666e-05, + "loss": 0.0233, + "step": 4031, + "task_loss": 0.08854260295629501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785818292123611, + "compression_loss": 0.0, + "distillation_loss": 0.12333665788173676, + "epoch": 3.83, + "learning_rate": 2.1352984013438964e-05, + "loss": 0.1226, + "step": 4032, + "task_loss": 0.11570969223976135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858279746730328, + "compression_loss": 0.0, + "distillation_loss": 0.015158230438828468, + "epoch": 3.83, + "learning_rate": 2.134244308697729e-05, + "loss": 0.0222, + "step": 4033, + "task_loss": 0.08555779606103897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858376528142829, + "compression_loss": 0.0, + "distillation_loss": 0.06008541211485863, + "epoch": 3.83, + "learning_rate": 2.133190282492844e-05, + "loss": 0.064, + "step": 4034, + "task_loss": 0.09927615523338318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858473265483649, + "compression_loss": 0.0, + "distillation_loss": 0.1671043038368225, + "epoch": 3.83, + "learning_rate": 2.1321363229207096e-05, + "loss": 0.1689, + "step": 4035, + "task_loss": 0.1853921115398407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858569958762825, + "compression_loss": 0.0, + "distillation_loss": 0.026313871145248413, + "epoch": 3.83, + "learning_rate": 2.131082430172785e-05, + "loss": 0.0243, + "step": 4036, + "task_loss": 0.006411956623196602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858666607990394, + "compression_loss": 0.0, + "distillation_loss": 0.07281780242919922, + "epoch": 3.83, + "learning_rate": 2.1300286044405135e-05, + "loss": 0.0689, + "step": 4037, + "task_loss": 0.033516231924295425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858763213176394, + "compression_loss": 0.0, + "distillation_loss": 0.04494217038154602, + "epoch": 3.83, + "learning_rate": 2.1289748459153283e-05, + "loss": 0.0421, + "step": 4038, + "task_loss": 0.016057439148426056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.785885977433086, + "compression_loss": 0.0, + "distillation_loss": 0.02288595214486122, + "epoch": 3.84, + "learning_rate": 2.1279211547886485e-05, + "loss": 0.0211, + "step": 4039, + "task_loss": 0.005246007815003395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7858956291463829, + "compression_loss": 0.0, + "distillation_loss": 0.041702110320329666, + "epoch": 3.84, + "learning_rate": 2.1268675312518833e-05, + "loss": 0.0515, + "step": 4040, + "task_loss": 0.1393231451511383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859052764585338, + "compression_loss": 0.0, + "distillation_loss": 0.034420765936374664, + "epoch": 3.84, + "learning_rate": 2.1258139754964283e-05, + "loss": 0.0314, + "step": 4041, + "task_loss": 0.004395313560962677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859149193705425, + "compression_loss": 0.0, + "distillation_loss": 0.057688407599925995, + "epoch": 3.84, + "learning_rate": 2.124760487713666e-05, + "loss": 0.0542, + "step": 4042, + "task_loss": 0.022908154875040054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859245578834124, + "compression_loss": 0.0, + "distillation_loss": 0.06689159572124481, + "epoch": 3.84, + "learning_rate": 2.1237070680949686e-05, + "loss": 0.0685, + "step": 4043, + "task_loss": 0.08265194296836853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859341919981475, + "compression_loss": 0.0, + "distillation_loss": 0.03234408050775528, + "epoch": 3.84, + "learning_rate": 2.122653716831694e-05, + "loss": 0.0304, + "step": 4044, + "task_loss": 0.012456223368644714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859438217157514, + "compression_loss": 0.0, + "distillation_loss": 0.046421635895967484, + "epoch": 3.84, + "learning_rate": 2.12160043411519e-05, + "loss": 0.0521, + "step": 4045, + "task_loss": 0.10356031358242035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859534470372276, + "compression_loss": 0.0, + "distillation_loss": 0.26599907875061035, + "epoch": 3.84, + "learning_rate": 2.120547220136789e-05, + "loss": 0.2447, + "step": 4046, + "task_loss": 0.053442008793354034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78596306796358, + "compression_loss": 0.0, + "distillation_loss": 0.06889566034078598, + "epoch": 3.84, + "learning_rate": 2.1194940750878132e-05, + "loss": 0.0723, + "step": 4047, + "task_loss": 0.10272115468978882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859726844958121, + "compression_loss": 0.0, + "distillation_loss": 0.03883613273501396, + "epoch": 3.84, + "learning_rate": 2.1184409991595713e-05, + "loss": 0.0432, + "step": 4048, + "task_loss": 0.0823683887720108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859822966349278, + "compression_loss": 0.0, + "distillation_loss": 0.0397968553006649, + "epoch": 3.85, + "learning_rate": 2.117387992543359e-05, + "loss": 0.0434, + "step": 4049, + "task_loss": 0.07577399909496307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7859919043819307, + "compression_loss": 0.0, + "distillation_loss": 0.17865076661109924, + "epoch": 3.85, + "learning_rate": 2.1163350554304613e-05, + "loss": 0.1735, + "step": 4050, + "task_loss": 0.12746202945709229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860015077378243, + "compression_loss": 0.0, + "distillation_loss": 0.02501235529780388, + "epoch": 3.85, + "learning_rate": 2.1152821880121482e-05, + "loss": 0.0232, + "step": 4051, + "task_loss": 0.007309248670935631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860111067036125, + "compression_loss": 0.0, + "distillation_loss": 0.07480224221944809, + "epoch": 3.85, + "learning_rate": 2.1142293904796783e-05, + "loss": 0.0842, + "step": 4052, + "task_loss": 0.16905774176120758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860207012802989, + "compression_loss": 0.0, + "distillation_loss": 0.04464121162891388, + "epoch": 3.85, + "learning_rate": 2.1131766630242966e-05, + "loss": 0.0478, + "step": 4053, + "task_loss": 0.07629342377185822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860302914688873, + "compression_loss": 0.0, + "distillation_loss": 0.023911893367767334, + "epoch": 3.85, + "learning_rate": 2.112124005837238e-05, + "loss": 0.0219, + "step": 4054, + "task_loss": 0.004040185362100601 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786039877270381, + "compression_loss": 0.0, + "distillation_loss": 0.017198091372847557, + "epoch": 3.85, + "learning_rate": 2.1110714191097222e-05, + "loss": 0.0159, + "step": 4055, + "task_loss": 0.004315854981541634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860494586857841, + "compression_loss": 0.0, + "distillation_loss": 0.09507577121257782, + "epoch": 3.85, + "learning_rate": 2.1100189030329558e-05, + "loss": 0.0988, + "step": 4056, + "task_loss": 0.1326824277639389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860590357161003, + "compression_loss": 0.0, + "distillation_loss": 0.03281257301568985, + "epoch": 3.85, + "learning_rate": 2.108966457798134e-05, + "loss": 0.0358, + "step": 4057, + "task_loss": 0.06237972527742386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860686083623328, + "compression_loss": 0.0, + "distillation_loss": 0.09298193454742432, + "epoch": 3.85, + "learning_rate": 2.107914083596438e-05, + "loss": 0.1066, + "step": 4058, + "task_loss": 0.22964733839035034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860781766254858, + "compression_loss": 0.0, + "distillation_loss": 0.10477671027183533, + "epoch": 3.85, + "learning_rate": 2.106861780619037e-05, + "loss": 0.0998, + "step": 4059, + "task_loss": 0.054976824671030045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860877405065628, + "compression_loss": 0.0, + "distillation_loss": 0.025005143135786057, + "epoch": 3.86, + "learning_rate": 2.105809549057088e-05, + "loss": 0.0288, + "step": 4060, + "task_loss": 0.06275138258934021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7860973000065674, + "compression_loss": 0.0, + "distillation_loss": 0.06488427519798279, + "epoch": 3.86, + "learning_rate": 2.1047573891017306e-05, + "loss": 0.0644, + "step": 4061, + "task_loss": 0.06053687259554863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861068551265034, + "compression_loss": 0.0, + "distillation_loss": 0.13458393514156342, + "epoch": 3.86, + "learning_rate": 2.103705300944099e-05, + "loss": 0.1369, + "step": 4062, + "task_loss": 0.15822012722492218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861164058673743, + "compression_loss": 0.0, + "distillation_loss": 0.18830344080924988, + "epoch": 3.86, + "learning_rate": 2.1026532847753068e-05, + "loss": 0.181, + "step": 4063, + "task_loss": 0.11519521474838257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786125952230184, + "compression_loss": 0.0, + "distillation_loss": 0.044180721044540405, + "epoch": 3.86, + "learning_rate": 2.1016013407864605e-05, + "loss": 0.0402, + "step": 4064, + "task_loss": 0.004225835204124451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861354942159361, + "compression_loss": 0.0, + "distillation_loss": 0.03563191741704941, + "epoch": 3.86, + "learning_rate": 2.1005494691686482e-05, + "loss": 0.0328, + "step": 4065, + "task_loss": 0.007603077217936516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861450318256342, + "compression_loss": 0.0, + "distillation_loss": 0.024195533245801926, + "epoch": 3.86, + "learning_rate": 2.0994976701129488e-05, + "loss": 0.0302, + "step": 4066, + "task_loss": 0.08473625034093857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861545650602821, + "compression_loss": 0.0, + "distillation_loss": 0.02827250212430954, + "epoch": 3.86, + "learning_rate": 2.098445943810427e-05, + "loss": 0.026, + "step": 4067, + "task_loss": 0.005987679585814476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861640939208835, + "compression_loss": 0.0, + "distillation_loss": 0.026403294876217842, + "epoch": 3.86, + "learning_rate": 2.0973942904521328e-05, + "loss": 0.0253, + "step": 4068, + "task_loss": 0.0155414380133152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786173618408442, + "compression_loss": 0.0, + "distillation_loss": 0.11837053298950195, + "epoch": 3.86, + "learning_rate": 2.096342710229105e-05, + "loss": 0.1148, + "step": 4069, + "task_loss": 0.08244297653436661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861831385239613, + "compression_loss": 0.0, + "distillation_loss": 0.04006524384021759, + "epoch": 3.87, + "learning_rate": 2.0952912033323672e-05, + "loss": 0.0377, + "step": 4070, + "task_loss": 0.016373053193092346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7861926542684451, + "compression_loss": 0.0, + "distillation_loss": 0.0364631786942482, + "epoch": 3.87, + "learning_rate": 2.0942397699529325e-05, + "loss": 0.0337, + "step": 4071, + "task_loss": 0.008914249017834663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786202165642897, + "compression_loss": 0.0, + "distillation_loss": 0.1414714902639389, + "epoch": 3.87, + "learning_rate": 2.0931884102817973e-05, + "loss": 0.1359, + "step": 4072, + "task_loss": 0.08563140779733658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862116726483208, + "compression_loss": 0.0, + "distillation_loss": 0.0371423065662384, + "epoch": 3.87, + "learning_rate": 2.0921371245099463e-05, + "loss": 0.0401, + "step": 4073, + "task_loss": 0.06720311939716339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862211752857201, + "compression_loss": 0.0, + "distillation_loss": 0.2344522476196289, + "epoch": 3.87, + "learning_rate": 2.0910859128283517e-05, + "loss": 0.2293, + "step": 4074, + "task_loss": 0.18332399427890778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862306735560987, + "compression_loss": 0.0, + "distillation_loss": 0.042274147272109985, + "epoch": 3.87, + "learning_rate": 2.0900347754279698e-05, + "loss": 0.0531, + "step": 4075, + "task_loss": 0.15049678087234497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.78624016746046, + "compression_loss": 0.0, + "distillation_loss": 0.045464303344488144, + "epoch": 3.87, + "learning_rate": 2.088983712499745e-05, + "loss": 0.043, + "step": 4076, + "task_loss": 0.02061353251338005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786249656999808, + "compression_loss": 0.0, + "distillation_loss": 0.13638713955879211, + "epoch": 3.87, + "learning_rate": 2.0879327242346093e-05, + "loss": 0.132, + "step": 4077, + "task_loss": 0.09256385266780853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862591421751463, + "compression_loss": 0.0, + "distillation_loss": 0.07257235050201416, + "epoch": 3.87, + "learning_rate": 2.0868818108234783e-05, + "loss": 0.078, + "step": 4078, + "task_loss": 0.12710356712341309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862686229874785, + "compression_loss": 0.0, + "distillation_loss": 0.1785210520029068, + "epoch": 3.87, + "learning_rate": 2.0858309724572554e-05, + "loss": 0.1706, + "step": 4079, + "task_loss": 0.09951774030923843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862780994378082, + "compression_loss": 0.0, + "distillation_loss": 0.04436881095170975, + "epoch": 3.87, + "learning_rate": 2.084780209326831e-05, + "loss": 0.0416, + "step": 4080, + "task_loss": 0.016401471570134163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862875715271395, + "compression_loss": 0.0, + "distillation_loss": 0.026207074522972107, + "epoch": 3.88, + "learning_rate": 2.0837295216230826e-05, + "loss": 0.0242, + "step": 4081, + "task_loss": 0.006385818123817444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7862970392564755, + "compression_loss": 0.0, + "distillation_loss": 0.07042695581912994, + "epoch": 3.88, + "learning_rate": 2.0826789095368705e-05, + "loss": 0.0771, + "step": 4082, + "task_loss": 0.1371847689151764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863065026268203, + "compression_loss": 0.0, + "distillation_loss": 0.09259558469057083, + "epoch": 3.88, + "learning_rate": 2.081628373259044e-05, + "loss": 0.097, + "step": 4083, + "task_loss": 0.13651143014431 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863159616391775, + "compression_loss": 0.0, + "distillation_loss": 0.1024775579571724, + "epoch": 3.88, + "learning_rate": 2.0805779129804397e-05, + "loss": 0.0936, + "step": 4084, + "task_loss": 0.01337103545665741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863254162945507, + "compression_loss": 0.0, + "distillation_loss": 0.0833493024110794, + "epoch": 3.88, + "learning_rate": 2.0795275288918763e-05, + "loss": 0.0871, + "step": 4085, + "task_loss": 0.12135336548089981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863348665939435, + "compression_loss": 0.0, + "distillation_loss": 0.03871447220444679, + "epoch": 3.88, + "learning_rate": 2.0784772211841624e-05, + "loss": 0.0447, + "step": 4086, + "task_loss": 0.09859400987625122 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863443125383598, + "compression_loss": 0.0, + "distillation_loss": 0.03221891075372696, + "epoch": 3.88, + "learning_rate": 2.077426990048091e-05, + "loss": 0.0301, + "step": 4087, + "task_loss": 0.011213263496756554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863537541288032, + "compression_loss": 0.0, + "distillation_loss": 0.02990700677037239, + "epoch": 3.88, + "learning_rate": 2.0763768356744428e-05, + "loss": 0.0282, + "step": 4088, + "task_loss": 0.012575274333357811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863631913662773, + "compression_loss": 0.0, + "distillation_loss": 0.042817167937755585, + "epoch": 3.88, + "learning_rate": 2.075326758253982e-05, + "loss": 0.0542, + "step": 4089, + "task_loss": 0.15655487775802612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863726242517859, + "compression_loss": 0.0, + "distillation_loss": 0.050644587725400925, + "epoch": 3.88, + "learning_rate": 2.0742767579774615e-05, + "loss": 0.0495, + "step": 4090, + "task_loss": 0.03952382132411003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7863820527863326, + "compression_loss": 0.0, + "distillation_loss": 0.10132403671741486, + "epoch": 3.89, + "learning_rate": 2.073226835035618e-05, + "loss": 0.105, + "step": 4091, + "task_loss": 0.13759130239486694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786391476970921, + "compression_loss": 0.0, + "distillation_loss": 0.05861423537135124, + "epoch": 3.89, + "learning_rate": 2.0721769896191752e-05, + "loss": 0.066, + "step": 4092, + "task_loss": 0.13297078013420105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864008968065551, + "compression_loss": 0.0, + "distillation_loss": 0.02992338128387928, + "epoch": 3.89, + "learning_rate": 2.0711272219188423e-05, + "loss": 0.0456, + "step": 4093, + "task_loss": 0.18631944060325623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864103122942382, + "compression_loss": 0.0, + "distillation_loss": 0.10860427469015121, + "epoch": 3.89, + "learning_rate": 2.0700775321253158e-05, + "loss": 0.1143, + "step": 4094, + "task_loss": 0.16594167053699493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864197234349742, + "compression_loss": 0.0, + "distillation_loss": 0.08554205298423767, + "epoch": 3.89, + "learning_rate": 2.0690279204292753e-05, + "loss": 0.1151, + "step": 4095, + "task_loss": 0.3806484341621399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864291302297668, + "compression_loss": 0.0, + "distillation_loss": 0.0441209115087986, + "epoch": 3.89, + "learning_rate": 2.0679783870213883e-05, + "loss": 0.0408, + "step": 4096, + "task_loss": 0.011142965406179428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864385326796196, + "compression_loss": 0.0, + "distillation_loss": 0.05388110131025314, + "epoch": 3.89, + "learning_rate": 2.0669289320923086e-05, + "loss": 0.049, + "step": 4097, + "task_loss": 0.004679244011640549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864479307855363, + "compression_loss": 0.0, + "distillation_loss": 0.033240221440792084, + "epoch": 3.89, + "learning_rate": 2.0658795558326743e-05, + "loss": 0.031, + "step": 4098, + "task_loss": 0.011169865727424622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864573245485206, + "compression_loss": 0.0, + "distillation_loss": 0.06061680614948273, + "epoch": 3.89, + "learning_rate": 2.0648302584331092e-05, + "loss": 0.0627, + "step": 4099, + "task_loss": 0.08101128786802292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864667139695761, + "compression_loss": 0.0, + "distillation_loss": 0.12393200397491455, + "epoch": 3.89, + "learning_rate": 2.0637810400842233e-05, + "loss": 0.1224, + "step": 4100, + "task_loss": 0.10840637236833572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864760990497066, + "compression_loss": 0.0, + "distillation_loss": 0.05604955554008484, + "epoch": 3.89, + "learning_rate": 2.0627319009766127e-05, + "loss": 0.0707, + "step": 4101, + "task_loss": 0.20234975218772888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864854797899157, + "compression_loss": 0.0, + "distillation_loss": 0.03791045770049095, + "epoch": 3.9, + "learning_rate": 2.0616828413008578e-05, + "loss": 0.0351, + "step": 4102, + "task_loss": 0.009459014981985092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7864948561912071, + "compression_loss": 0.0, + "distillation_loss": 0.01791505143046379, + "epoch": 3.9, + "learning_rate": 2.0606338612475264e-05, + "loss": 0.0255, + "step": 4103, + "task_loss": 0.09366492927074432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865042282545845, + "compression_loss": 0.0, + "distillation_loss": 0.14737428724765778, + "epoch": 3.9, + "learning_rate": 2.0595849610071697e-05, + "loss": 0.1548, + "step": 4104, + "task_loss": 0.22147542238235474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865135959810516, + "compression_loss": 0.0, + "distillation_loss": 0.04210761934518814, + "epoch": 3.9, + "learning_rate": 2.058536140770325e-05, + "loss": 0.0384, + "step": 4105, + "task_loss": 0.004892520606517792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865229593716121, + "compression_loss": 0.0, + "distillation_loss": 0.03413204848766327, + "epoch": 3.9, + "learning_rate": 2.057487400727517e-05, + "loss": 0.0316, + "step": 4106, + "task_loss": 0.008931750431656837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865323184272696, + "compression_loss": 0.0, + "distillation_loss": 0.014508235268294811, + "epoch": 3.9, + "learning_rate": 2.0564387410692544e-05, + "loss": 0.0136, + "step": 4107, + "task_loss": 0.00587865523993969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865416731490278, + "compression_loss": 0.0, + "distillation_loss": 0.1767938733100891, + "epoch": 3.9, + "learning_rate": 2.0553901619860306e-05, + "loss": 0.1712, + "step": 4108, + "task_loss": 0.1211482584476471 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865510235378904, + "compression_loss": 0.0, + "distillation_loss": 0.11559472978115082, + "epoch": 3.9, + "learning_rate": 2.0543416636683246e-05, + "loss": 0.1049, + "step": 4109, + "task_loss": 0.008573394268751144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865603695948611, + "compression_loss": 0.0, + "distillation_loss": 0.046687278896570206, + "epoch": 3.9, + "learning_rate": 2.0532932463066023e-05, + "loss": 0.0461, + "step": 4110, + "task_loss": 0.04126298427581787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865697113209436, + "compression_loss": 0.0, + "distillation_loss": 0.06493042409420013, + "epoch": 3.9, + "learning_rate": 2.052244910091313e-05, + "loss": 0.0873, + "step": 4111, + "task_loss": 0.28819745779037476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865790487171415, + "compression_loss": 0.0, + "distillation_loss": 0.10494603216648102, + "epoch": 3.91, + "learning_rate": 2.051196655212892e-05, + "loss": 0.0967, + "step": 4112, + "task_loss": 0.022337011992931366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865883817844586, + "compression_loss": 0.0, + "distillation_loss": 0.12292291969060898, + "epoch": 3.91, + "learning_rate": 2.0501484818617594e-05, + "loss": 0.1399, + "step": 4113, + "task_loss": 0.29296374320983887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7865977105238985, + "compression_loss": 0.0, + "distillation_loss": 0.043324559926986694, + "epoch": 3.91, + "learning_rate": 2.0491003902283225e-05, + "loss": 0.0394, + "step": 4114, + "task_loss": 0.004396416246891022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866070349364648, + "compression_loss": 0.0, + "distillation_loss": 0.05673076957464218, + "epoch": 3.91, + "learning_rate": 2.048052380502971e-05, + "loss": 0.0565, + "step": 4115, + "task_loss": 0.054648954421281815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866163550231614, + "compression_loss": 0.0, + "distillation_loss": 0.023540731519460678, + "epoch": 3.91, + "learning_rate": 2.047004452876081e-05, + "loss": 0.0216, + "step": 4116, + "task_loss": 0.004315689206123352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866256707849918, + "compression_loss": 0.0, + "distillation_loss": 0.09178947657346725, + "epoch": 3.91, + "learning_rate": 2.045956607538015e-05, + "loss": 0.0978, + "step": 4117, + "task_loss": 0.15143872797489166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866349822229597, + "compression_loss": 0.0, + "distillation_loss": 0.2266932725906372, + "epoch": 3.91, + "learning_rate": 2.0449088446791165e-05, + "loss": 0.2216, + "step": 4118, + "task_loss": 0.17611849308013916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866442893380688, + "compression_loss": 0.0, + "distillation_loss": 0.027620844542980194, + "epoch": 3.91, + "learning_rate": 2.043861164489719e-05, + "loss": 0.0354, + "step": 4119, + "task_loss": 0.1055789515376091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786653592131323, + "compression_loss": 0.0, + "distillation_loss": 0.0539475753903389, + "epoch": 3.91, + "learning_rate": 2.0428135671601373e-05, + "loss": 0.0601, + "step": 4120, + "task_loss": 0.11543746292591095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866628906037256, + "compression_loss": 0.0, + "distillation_loss": 0.047250404953956604, + "epoch": 3.91, + "learning_rate": 2.041766052880673e-05, + "loss": 0.0593, + "step": 4121, + "task_loss": 0.167374387383461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866721847562806, + "compression_loss": 0.0, + "distillation_loss": 0.05512014403939247, + "epoch": 3.91, + "learning_rate": 2.0407186218416114e-05, + "loss": 0.0648, + "step": 4122, + "task_loss": 0.15170332789421082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7866814745899915, + "compression_loss": 0.0, + "distillation_loss": 0.03062654659152031, + "epoch": 3.92, + "learning_rate": 2.039671274233225e-05, + "loss": 0.0388, + "step": 4123, + "task_loss": 0.11201991140842438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786690760105862, + "compression_loss": 0.0, + "distillation_loss": 0.01795804500579834, + "epoch": 3.92, + "learning_rate": 2.0386240102457682e-05, + "loss": 0.0166, + "step": 4124, + "task_loss": 0.004647407680749893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867000413048959, + "compression_loss": 0.0, + "distillation_loss": 0.08352886140346527, + "epoch": 3.92, + "learning_rate": 2.0375768300694824e-05, + "loss": 0.0783, + "step": 4125, + "task_loss": 0.030838267877697945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867093181880968, + "compression_loss": 0.0, + "distillation_loss": 0.16173683106899261, + "epoch": 3.92, + "learning_rate": 2.0365297338945917e-05, + "loss": 0.1537, + "step": 4126, + "task_loss": 0.08173392713069916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867185907564682, + "compression_loss": 0.0, + "distillation_loss": 0.05559203773736954, + "epoch": 3.92, + "learning_rate": 2.035482721911308e-05, + "loss": 0.0688, + "step": 4127, + "task_loss": 0.18805257976055145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867278590110142, + "compression_loss": 0.0, + "distillation_loss": 0.03582768887281418, + "epoch": 3.92, + "learning_rate": 2.034435794309824e-05, + "loss": 0.0388, + "step": 4128, + "task_loss": 0.06604026257991791 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867371229527381, + "compression_loss": 0.0, + "distillation_loss": 0.042852409183979034, + "epoch": 3.92, + "learning_rate": 2.0333889512803204e-05, + "loss": 0.0475, + "step": 4129, + "task_loss": 0.08887660503387451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867463825826438, + "compression_loss": 0.0, + "distillation_loss": 0.10272371768951416, + "epoch": 3.92, + "learning_rate": 2.0323421930129617e-05, + "loss": 0.0932, + "step": 4130, + "task_loss": 0.007061410695314407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867556379017349, + "compression_loss": 0.0, + "distillation_loss": 0.15910032391548157, + "epoch": 3.92, + "learning_rate": 2.031295519697895e-05, + "loss": 0.1638, + "step": 4131, + "task_loss": 0.2062944769859314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786764888911015, + "compression_loss": 0.0, + "distillation_loss": 0.15020066499710083, + "epoch": 3.92, + "learning_rate": 2.0302489315252545e-05, + "loss": 0.17, + "step": 4132, + "task_loss": 0.3483584523200989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786774135611488, + "compression_loss": 0.0, + "distillation_loss": 0.07658859342336655, + "epoch": 3.92, + "learning_rate": 2.0292024286851584e-05, + "loss": 0.0723, + "step": 4133, + "task_loss": 0.03387470170855522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867833780041574, + "compression_loss": 0.0, + "distillation_loss": 0.06731578707695007, + "epoch": 3.93, + "learning_rate": 2.0281560113677086e-05, + "loss": 0.0683, + "step": 4134, + "task_loss": 0.07722228765487671 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7867926160900269, + "compression_loss": 0.0, + "distillation_loss": 0.03083537518978119, + "epoch": 3.93, + "learning_rate": 2.0271096797629915e-05, + "loss": 0.0292, + "step": 4135, + "task_loss": 0.014749417081475258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868018498701004, + "compression_loss": 0.0, + "distillation_loss": 0.055131688714027405, + "epoch": 3.93, + "learning_rate": 2.0260634340610786e-05, + "loss": 0.0589, + "step": 4136, + "task_loss": 0.09324628859758377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868110793453812, + "compression_loss": 0.0, + "distillation_loss": 0.06215892732143402, + "epoch": 3.93, + "learning_rate": 2.0250172744520258e-05, + "loss": 0.0583, + "step": 4137, + "task_loss": 0.023460306227207184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868203045168732, + "compression_loss": 0.0, + "distillation_loss": 0.11791975051164627, + "epoch": 3.93, + "learning_rate": 2.023971201125872e-05, + "loss": 0.1233, + "step": 4138, + "task_loss": 0.17152175307273865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868295253855802, + "compression_loss": 0.0, + "distillation_loss": 0.03688070923089981, + "epoch": 3.93, + "learning_rate": 2.0229252142726415e-05, + "loss": 0.0461, + "step": 4139, + "task_loss": 0.12953397631645203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868387419525057, + "compression_loss": 0.0, + "distillation_loss": 0.059014152735471725, + "epoch": 3.93, + "learning_rate": 2.021879314082344e-05, + "loss": 0.0663, + "step": 4140, + "task_loss": 0.13181355595588684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868479542186534, + "compression_loss": 0.0, + "distillation_loss": 0.31541213393211365, + "epoch": 3.93, + "learning_rate": 2.0208335007449726e-05, + "loss": 0.3036, + "step": 4141, + "task_loss": 0.19743433594703674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786857162185027, + "compression_loss": 0.0, + "distillation_loss": 0.029475336894392967, + "epoch": 3.93, + "learning_rate": 2.019787774450503e-05, + "loss": 0.0269, + "step": 4142, + "task_loss": 0.004132760688662529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868663658526301, + "compression_loss": 0.0, + "distillation_loss": 0.13314858078956604, + "epoch": 3.93, + "learning_rate": 2.0187421353888966e-05, + "loss": 0.1294, + "step": 4143, + "task_loss": 0.09587833285331726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868755652224667, + "compression_loss": 0.0, + "distillation_loss": 0.05543944984674454, + "epoch": 3.94, + "learning_rate": 2.0176965837500995e-05, + "loss": 0.0528, + "step": 4144, + "task_loss": 0.029170924797654152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868847602955401, + "compression_loss": 0.0, + "distillation_loss": 0.031388137489557266, + "epoch": 3.94, + "learning_rate": 2.0166511197240405e-05, + "loss": 0.0293, + "step": 4145, + "task_loss": 0.01011231541633606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7868939510728542, + "compression_loss": 0.0, + "distillation_loss": 0.1392134577035904, + "epoch": 3.94, + "learning_rate": 2.015605743500634e-05, + "loss": 0.1419, + "step": 4146, + "task_loss": 0.16592922806739807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869031375554126, + "compression_loss": 0.0, + "distillation_loss": 0.04586111381649971, + "epoch": 3.94, + "learning_rate": 2.0145604552697763e-05, + "loss": 0.0487, + "step": 4147, + "task_loss": 0.07397586107254028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.786912319744219, + "compression_loss": 0.0, + "distillation_loss": 0.030558858066797256, + "epoch": 3.94, + "learning_rate": 2.0135152552213493e-05, + "loss": 0.0542, + "step": 4148, + "task_loss": 0.2674226462841034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869214976402772, + "compression_loss": 0.0, + "distillation_loss": 0.1152239590883255, + "epoch": 3.94, + "learning_rate": 2.0124701435452198e-05, + "loss": 0.1043, + "step": 4149, + "task_loss": 0.005567222833633423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869306712445906, + "compression_loss": 0.0, + "distillation_loss": 0.15521375834941864, + "epoch": 3.94, + "learning_rate": 2.0114251204312367e-05, + "loss": 0.1738, + "step": 4150, + "task_loss": 0.3411465287208557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869398405581631, + "compression_loss": 0.0, + "distillation_loss": 0.07683128863573074, + "epoch": 3.94, + "learning_rate": 2.010380186069234e-05, + "loss": 0.0698, + "step": 4151, + "task_loss": 0.0065435003489255905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869490055819984, + "compression_loss": 0.0, + "distillation_loss": 0.07263028621673584, + "epoch": 3.94, + "learning_rate": 2.009335340649028e-05, + "loss": 0.0779, + "step": 4152, + "task_loss": 0.12566472589969635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869581663171001, + "compression_loss": 0.0, + "distillation_loss": 0.10479018092155457, + "epoch": 3.94, + "learning_rate": 2.0082905843604206e-05, + "loss": 0.1073, + "step": 4153, + "task_loss": 0.13012909889221191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869673227644719, + "compression_loss": 0.0, + "distillation_loss": 0.0787314772605896, + "epoch": 3.94, + "learning_rate": 2.0072459173931964e-05, + "loss": 0.0745, + "step": 4154, + "task_loss": 0.03596143424510956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869764749251175, + "compression_loss": 0.0, + "distillation_loss": 0.02369462139904499, + "epoch": 3.95, + "learning_rate": 2.0062013399371245e-05, + "loss": 0.022, + "step": 4155, + "task_loss": 0.007038429379463196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869856228000405, + "compression_loss": 0.0, + "distillation_loss": 0.018353287130594254, + "epoch": 3.95, + "learning_rate": 2.005156852181958e-05, + "loss": 0.0253, + "step": 4156, + "task_loss": 0.08732615411281586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7869947663902447, + "compression_loss": 0.0, + "distillation_loss": 0.026407258585095406, + "epoch": 3.95, + "learning_rate": 2.004112454317431e-05, + "loss": 0.0248, + "step": 4157, + "task_loss": 0.010559692978858948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870039056967336, + "compression_loss": 0.0, + "distillation_loss": 0.018740851432085037, + "epoch": 3.95, + "learning_rate": 2.003068146533266e-05, + "loss": 0.0175, + "step": 4158, + "task_loss": 0.00584782287478447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870130407205111, + "compression_loss": 0.0, + "distillation_loss": 0.1747373789548874, + "epoch": 3.95, + "learning_rate": 2.002023929019165e-05, + "loss": 0.1671, + "step": 4159, + "task_loss": 0.09856338798999786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870221714625809, + "compression_loss": 0.0, + "distillation_loss": 0.05840107798576355, + "epoch": 3.95, + "learning_rate": 2.0009798019648163e-05, + "loss": 0.0601, + "step": 4160, + "task_loss": 0.07528725266456604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870312979239464, + "compression_loss": 0.0, + "distillation_loss": 0.052719514816999435, + "epoch": 3.95, + "learning_rate": 1.9999357655598893e-05, + "loss": 0.0555, + "step": 4161, + "task_loss": 0.08041465282440186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870404201056116, + "compression_loss": 0.0, + "distillation_loss": 0.11436031758785248, + "epoch": 3.95, + "learning_rate": 1.9988918199940386e-05, + "loss": 0.1258, + "step": 4162, + "task_loss": 0.22912541031837463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870495380085799, + "compression_loss": 0.0, + "distillation_loss": 0.027497505769133568, + "epoch": 3.95, + "learning_rate": 1.997847965456903e-05, + "loss": 0.0335, + "step": 4163, + "task_loss": 0.08756634593009949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870586516338552, + "compression_loss": 0.0, + "distillation_loss": 0.12368052452802658, + "epoch": 3.95, + "learning_rate": 1.9968042021381023e-05, + "loss": 0.1237, + "step": 4164, + "task_loss": 0.12429526448249817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870677609824411, + "compression_loss": 0.0, + "distillation_loss": 0.03644444793462753, + "epoch": 3.96, + "learning_rate": 1.9957605302272412e-05, + "loss": 0.0463, + "step": 4165, + "task_loss": 0.13482612371444702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870768660553412, + "compression_loss": 0.0, + "distillation_loss": 0.1511649489402771, + "epoch": 3.96, + "learning_rate": 1.9947169499139083e-05, + "loss": 0.1411, + "step": 4166, + "task_loss": 0.05041804164648056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870859668535594, + "compression_loss": 0.0, + "distillation_loss": 0.16785401105880737, + "epoch": 3.96, + "learning_rate": 1.9936734613876762e-05, + "loss": 0.1752, + "step": 4167, + "task_loss": 0.24086761474609375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7870950633780992, + "compression_loss": 0.0, + "distillation_loss": 0.06589843332767487, + "epoch": 3.96, + "learning_rate": 1.9926300648380976e-05, + "loss": 0.0679, + "step": 4168, + "task_loss": 0.08612485975027084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871041556299643, + "compression_loss": 0.0, + "distillation_loss": 0.1752784252166748, + "epoch": 3.96, + "learning_rate": 1.9915867604547106e-05, + "loss": 0.168, + "step": 4169, + "task_loss": 0.10218894481658936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871132436101584, + "compression_loss": 0.0, + "distillation_loss": 0.06247349828481674, + "epoch": 3.96, + "learning_rate": 1.9905435484270383e-05, + "loss": 0.0576, + "step": 4170, + "task_loss": 0.014096638187766075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871223273196852, + "compression_loss": 0.0, + "distillation_loss": 0.032126858830451965, + "epoch": 3.96, + "learning_rate": 1.989500428944583e-05, + "loss": 0.0376, + "step": 4171, + "task_loss": 0.08727513253688812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871314067595484, + "compression_loss": 0.0, + "distillation_loss": 0.032654955983161926, + "epoch": 3.96, + "learning_rate": 1.988457402196834e-05, + "loss": 0.0306, + "step": 4172, + "task_loss": 0.01211773045361042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871404819307517, + "compression_loss": 0.0, + "distillation_loss": 0.018787803128361702, + "epoch": 3.96, + "learning_rate": 1.9874144683732615e-05, + "loss": 0.0173, + "step": 4173, + "task_loss": 0.0037378836423158646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871495528342987, + "compression_loss": 0.0, + "distillation_loss": 0.04686371237039566, + "epoch": 3.96, + "learning_rate": 1.9863716276633186e-05, + "loss": 0.0444, + "step": 4174, + "task_loss": 0.021781545132398605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.787158619471193, + "compression_loss": 0.0, + "distillation_loss": 0.089274100959301, + "epoch": 3.96, + "learning_rate": 1.9853288802564438e-05, + "loss": 0.0895, + "step": 4175, + "task_loss": 0.09111557900905609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871676818424386, + "compression_loss": 0.0, + "distillation_loss": 0.0693182647228241, + "epoch": 3.97, + "learning_rate": 1.9842862263420564e-05, + "loss": 0.0683, + "step": 4176, + "task_loss": 0.05941478908061981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871767399490389, + "compression_loss": 0.0, + "distillation_loss": 0.02392822504043579, + "epoch": 3.97, + "learning_rate": 1.9832436661095604e-05, + "loss": 0.0366, + "step": 4177, + "task_loss": 0.15036530792713165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871857937919977, + "compression_loss": 0.0, + "distillation_loss": 0.07844184339046478, + "epoch": 3.97, + "learning_rate": 1.98220119974834e-05, + "loss": 0.0762, + "step": 4178, + "task_loss": 0.0561903640627861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7871948433723186, + "compression_loss": 0.0, + "distillation_loss": 0.08809831738471985, + "epoch": 3.97, + "learning_rate": 1.9811588274477665e-05, + "loss": 0.0907, + "step": 4179, + "task_loss": 0.11436465382575989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872038886910053, + "compression_loss": 0.0, + "distillation_loss": 0.02092152088880539, + "epoch": 3.97, + "learning_rate": 1.980116549397191e-05, + "loss": 0.0242, + "step": 4180, + "task_loss": 0.05349248647689819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872129297490617, + "compression_loss": 0.0, + "distillation_loss": 0.10855552554130554, + "epoch": 3.97, + "learning_rate": 1.979074365785947e-05, + "loss": 0.104, + "step": 4181, + "task_loss": 0.06298065930604935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872219665474911, + "compression_loss": 0.0, + "distillation_loss": 0.07886943221092224, + "epoch": 3.97, + "learning_rate": 1.978032276803354e-05, + "loss": 0.0749, + "step": 4182, + "task_loss": 0.03924179822206497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872309990872974, + "compression_loss": 0.0, + "distillation_loss": 0.024403858929872513, + "epoch": 3.97, + "learning_rate": 1.9769902826387106e-05, + "loss": 0.0229, + "step": 4183, + "task_loss": 0.008915219455957413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872400273694844, + "compression_loss": 0.0, + "distillation_loss": 0.08649304509162903, + "epoch": 3.97, + "learning_rate": 1.9759483834813023e-05, + "loss": 0.0885, + "step": 4184, + "task_loss": 0.10659407079219818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872490513950555, + "compression_loss": 0.0, + "distillation_loss": 0.030071692541241646, + "epoch": 3.97, + "learning_rate": 1.9749065795203938e-05, + "loss": 0.0468, + "step": 4185, + "task_loss": 0.19697120785713196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872580711650146, + "compression_loss": 0.0, + "distillation_loss": 0.05038078874349594, + "epoch": 3.98, + "learning_rate": 1.9738648709452336e-05, + "loss": 0.0527, + "step": 4186, + "task_loss": 0.07359597086906433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872670866803653, + "compression_loss": 0.0, + "distillation_loss": 0.09490829706192017, + "epoch": 3.98, + "learning_rate": 1.9728232579450543e-05, + "loss": 0.1038, + "step": 4187, + "task_loss": 0.18415942788124084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872760979421113, + "compression_loss": 0.0, + "distillation_loss": 0.1876422017812729, + "epoch": 3.98, + "learning_rate": 1.971781740709068e-05, + "loss": 0.1938, + "step": 4188, + "task_loss": 0.24940979480743408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872851049512563, + "compression_loss": 0.0, + "distillation_loss": 0.021935122087597847, + "epoch": 3.98, + "learning_rate": 1.970740319426474e-05, + "loss": 0.0206, + "step": 4189, + "task_loss": 0.008345887064933777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7872941077088038, + "compression_loss": 0.0, + "distillation_loss": 0.07562540471553802, + "epoch": 3.98, + "learning_rate": 1.9696989942864488e-05, + "loss": 0.0898, + "step": 4190, + "task_loss": 0.21687836945056915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873031062157578, + "compression_loss": 0.0, + "distillation_loss": 0.06813269853591919, + "epoch": 3.98, + "learning_rate": 1.9686577654781546e-05, + "loss": 0.0756, + "step": 4191, + "task_loss": 0.14326757192611694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873121004731217, + "compression_loss": 0.0, + "distillation_loss": 0.034589093178510666, + "epoch": 3.98, + "learning_rate": 1.967616633190737e-05, + "loss": 0.0422, + "step": 4192, + "task_loss": 0.11064015328884125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873210904818994, + "compression_loss": 0.0, + "distillation_loss": 0.020227717235684395, + "epoch": 3.98, + "learning_rate": 1.966575597613322e-05, + "loss": 0.0186, + "step": 4193, + "task_loss": 0.004256442189216614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873300762430945, + "compression_loss": 0.0, + "distillation_loss": 0.10587961226701736, + "epoch": 3.98, + "learning_rate": 1.9655346589350194e-05, + "loss": 0.1102, + "step": 4194, + "task_loss": 0.14888451993465424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873390577577105, + "compression_loss": 0.0, + "distillation_loss": 0.08755764365196228, + "epoch": 3.98, + "learning_rate": 1.964493817344919e-05, + "loss": 0.1018, + "step": 4195, + "task_loss": 0.22955255210399628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873480350267514, + "compression_loss": 0.0, + "distillation_loss": 0.16795973479747772, + "epoch": 3.98, + "learning_rate": 1.9634530730320967e-05, + "loss": 0.1533, + "step": 4196, + "task_loss": 0.02090776339173317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873570080512207, + "compression_loss": 0.0, + "distillation_loss": 0.1484326869249344, + "epoch": 3.99, + "learning_rate": 1.9624124261856068e-05, + "loss": 0.1569, + "step": 4197, + "task_loss": 0.23264384269714355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873659768321221, + "compression_loss": 0.0, + "distillation_loss": 0.07327691465616226, + "epoch": 3.99, + "learning_rate": 1.961371876994489e-05, + "loss": 0.0931, + "step": 4198, + "task_loss": 0.2711363434791565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873749413704593, + "compression_loss": 0.0, + "distillation_loss": 0.04226084426045418, + "epoch": 3.99, + "learning_rate": 1.9603314256477644e-05, + "loss": 0.0453, + "step": 4199, + "task_loss": 0.0727810189127922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873839016672359, + "compression_loss": 0.0, + "distillation_loss": 0.1644660234451294, + "epoch": 3.99, + "learning_rate": 1.9592910723344335e-05, + "loss": 0.1607, + "step": 4200, + "task_loss": 0.12694597244262695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7873928577234557, + "compression_loss": 0.0, + "distillation_loss": 0.145688995718956, + "epoch": 3.99, + "learning_rate": 1.958250817243485e-05, + "loss": 0.1528, + "step": 4201, + "task_loss": 0.2163536101579666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874018095401224, + "compression_loss": 0.0, + "distillation_loss": 0.04844709113240242, + "epoch": 3.99, + "learning_rate": 1.9572106605638842e-05, + "loss": 0.0536, + "step": 4202, + "task_loss": 0.09967800229787827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874107571182396, + "compression_loss": 0.0, + "distillation_loss": 0.019720932468771935, + "epoch": 3.99, + "learning_rate": 1.956170602484582e-05, + "loss": 0.0182, + "step": 4203, + "task_loss": 0.004351753741502762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874197004588109, + "compression_loss": 0.0, + "distillation_loss": 0.10592324286699295, + "epoch": 3.99, + "learning_rate": 1.955130643194508e-05, + "loss": 0.1063, + "step": 4204, + "task_loss": 0.10958631336688995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874286395628401, + "compression_loss": 0.0, + "distillation_loss": 0.05713961645960808, + "epoch": 3.99, + "learning_rate": 1.9540907828825768e-05, + "loss": 0.0603, + "step": 4205, + "task_loss": 0.08864769339561462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.787437574431331, + "compression_loss": 0.0, + "distillation_loss": 0.0976656973361969, + "epoch": 3.99, + "learning_rate": 1.9530510217376843e-05, + "loss": 0.1037, + "step": 4206, + "task_loss": 0.15841828286647797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874465050652871, + "compression_loss": 0.0, + "distillation_loss": 0.13762739300727844, + "epoch": 4.0, + "learning_rate": 1.952011359948708e-05, + "loss": 0.1429, + "step": 4207, + "task_loss": 0.1902618259191513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874554314657122, + "compression_loss": 0.0, + "distillation_loss": 0.047601085156202316, + "epoch": 4.0, + "learning_rate": 1.9509717977045068e-05, + "loss": 0.0439, + "step": 4208, + "task_loss": 0.01073162630200386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874643536336098, + "compression_loss": 0.0, + "distillation_loss": 0.056678276509046555, + "epoch": 4.0, + "learning_rate": 1.949932335193922e-05, + "loss": 0.0562, + "step": 4209, + "task_loss": 0.052305128425359726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874732715699837, + "compression_loss": 0.0, + "distillation_loss": 0.18532370030879974, + "epoch": 4.0, + "learning_rate": 1.948892972605779e-05, + "loss": 0.1904, + "step": 4210, + "task_loss": 0.2363312840461731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874821852758376, + "compression_loss": 0.0, + "distillation_loss": 0.1466386467218399, + "epoch": 4.0, + "learning_rate": 1.9478537101288814e-05, + "loss": 0.1392, + "step": 4211, + "task_loss": 0.07213930040597916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, + "compression/magnitude_sparsity/target_sparsity_level": 0.7874910947521752, + "compression_loss": 0.0, + "distillation_loss": 0.078835628926754, + "epoch": 4.0, + "learning_rate": 1.946814547952016e-05, + "loss": 0.0736, + "step": 4212, + "task_loss": 0.02693953923881054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875000000000001, + "compression_loss": 0.0, + "distillation_loss": 0.4135439693927765, + "epoch": 4.0, + "learning_rate": 1.945775486263953e-05, + "loss": 0.3929, + "step": 4213, + "task_loss": 0.20665842294692993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787508901020316, + "compression_loss": 0.0, + "distillation_loss": 0.32134756445884705, + "epoch": 4.0, + "learning_rate": 1.9447365252534414e-05, + "loss": 0.2996, + "step": 4214, + "task_loss": 0.10436099767684937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875177978141266, + "compression_loss": 0.0, + "distillation_loss": 0.3447112441062927, + "epoch": 4.0, + "learning_rate": 1.9436976651092144e-05, + "loss": 0.3183, + "step": 4215, + "task_loss": 0.08028891682624817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875266903824356, + "compression_loss": 0.0, + "distillation_loss": 0.2967658042907715, + "epoch": 4.0, + "learning_rate": 1.942658906019986e-05, + "loss": 0.2787, + "step": 4216, + "task_loss": 0.11631828546524048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875355787262467, + "compression_loss": 0.0, + "distillation_loss": 0.3276255130767822, + "epoch": 4.0, + "learning_rate": 1.9416202481744504e-05, + "loss": 0.3027, + "step": 4217, + "task_loss": 0.07845309376716614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875444628465635, + "compression_loss": 0.0, + "distillation_loss": 0.3934395909309387, + "epoch": 4.01, + "learning_rate": 1.940581691761287e-05, + "loss": 0.3804, + "step": 4218, + "task_loss": 0.2632703185081482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875533427443898, + "compression_loss": 0.0, + "distillation_loss": 0.3027340769767761, + "epoch": 4.01, + "learning_rate": 1.9395432369691526e-05, + "loss": 0.2816, + "step": 4219, + "task_loss": 0.09113991260528564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875622184207293, + "compression_loss": 0.0, + "distillation_loss": 0.30732953548431396, + "epoch": 4.01, + "learning_rate": 1.9385048839866896e-05, + "loss": 0.2857, + "step": 4220, + "task_loss": 0.09076324105262756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875710898765854, + "compression_loss": 0.0, + "distillation_loss": 0.27244919538497925, + "epoch": 4.01, + "learning_rate": 1.9374666330025178e-05, + "loss": 0.26, + "step": 4221, + "task_loss": 0.14839491248130798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875799571129621, + "compression_loss": 0.0, + "distillation_loss": 0.13425913453102112, + "epoch": 4.01, + "learning_rate": 1.9364284842052414e-05, + "loss": 0.1228, + "step": 4222, + "task_loss": 0.019507169723510742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787588820130863, + "compression_loss": 0.0, + "distillation_loss": 0.17389854788780212, + "epoch": 4.01, + "learning_rate": 1.9353904377834454e-05, + "loss": 0.1588, + "step": 4223, + "task_loss": 0.0224696546792984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7875976789312916, + "compression_loss": 0.0, + "distillation_loss": 0.16582781076431274, + "epoch": 4.01, + "learning_rate": 1.934352493925695e-05, + "loss": 0.1547, + "step": 4224, + "task_loss": 0.05480484664440155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876065335152518, + "compression_loss": 0.0, + "distillation_loss": 0.09588154405355453, + "epoch": 4.01, + "learning_rate": 1.933314652820539e-05, + "loss": 0.0882, + "step": 4225, + "task_loss": 0.019207999110221863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876153838837472, + "compression_loss": 0.0, + "distillation_loss": 0.11465960741043091, + "epoch": 4.01, + "learning_rate": 1.932276914656504e-05, + "loss": 0.106, + "step": 4226, + "task_loss": 0.028103739023208618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876242300377816, + "compression_loss": 0.0, + "distillation_loss": 0.22152969241142273, + "epoch": 4.01, + "learning_rate": 1.9312392796221033e-05, + "loss": 0.217, + "step": 4227, + "task_loss": 0.1767098605632782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876330719783584, + "compression_loss": 0.0, + "distillation_loss": 0.17856980860233307, + "epoch": 4.02, + "learning_rate": 1.9302017479058256e-05, + "loss": 0.1757, + "step": 4228, + "task_loss": 0.149837926030159 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876419097064816, + "compression_loss": 0.0, + "distillation_loss": 0.08909793198108673, + "epoch": 4.02, + "learning_rate": 1.929164319696145e-05, + "loss": 0.09, + "step": 4229, + "task_loss": 0.09795405715703964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876507432231546, + "compression_loss": 0.0, + "distillation_loss": 0.11065828800201416, + "epoch": 4.02, + "learning_rate": 1.9281269951815154e-05, + "loss": 0.1068, + "step": 4230, + "task_loss": 0.07157891988754272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876595725293813, + "compression_loss": 0.0, + "distillation_loss": 0.1124470978975296, + "epoch": 4.02, + "learning_rate": 1.9270897745503706e-05, + "loss": 0.107, + "step": 4231, + "task_loss": 0.05780310928821564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876683976261654, + "compression_loss": 0.0, + "distillation_loss": 0.09072036296129227, + "epoch": 4.02, + "learning_rate": 1.9260526579911283e-05, + "loss": 0.0887, + "step": 4232, + "task_loss": 0.07040438801050186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7876772185145103, + "compression_loss": 0.0, + "distillation_loss": 0.1262628436088562, + "epoch": 4.02, + "learning_rate": 1.9250156456921837e-05, + "loss": 0.1201, + "step": 4233, + "task_loss": 0.06450807303190231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.78768603519542, + "compression_loss": 0.0, + "distillation_loss": 0.062325261533260345, + "epoch": 4.02, + "learning_rate": 1.9239787378419165e-05, + "loss": 0.0646, + "step": 4234, + "task_loss": 0.08547300100326538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787694847669898, + "compression_loss": 0.0, + "distillation_loss": 0.054038092494010925, + "epoch": 4.02, + "learning_rate": 1.9229419346286853e-05, + "loss": 0.0545, + "step": 4235, + "task_loss": 0.05840130150318146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787703655938948, + "compression_loss": 0.0, + "distillation_loss": 0.11831286549568176, + "epoch": 4.02, + "learning_rate": 1.9219052362408314e-05, + "loss": 0.1204, + "step": 4236, + "task_loss": 0.13891497254371643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877124600035738, + "compression_loss": 0.0, + "distillation_loss": 0.13940417766571045, + "epoch": 4.02, + "learning_rate": 1.920868642866676e-05, + "loss": 0.1331, + "step": 4237, + "task_loss": 0.07673609256744385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877212598647789, + "compression_loss": 0.0, + "distillation_loss": 0.11010223627090454, + "epoch": 4.02, + "learning_rate": 1.91983215469452e-05, + "loss": 0.1152, + "step": 4238, + "task_loss": 0.16106702387332916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877300555235671, + "compression_loss": 0.0, + "distillation_loss": 0.18333761394023895, + "epoch": 4.03, + "learning_rate": 1.918795771912648e-05, + "loss": 0.1795, + "step": 4239, + "task_loss": 0.14447200298309326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877388469809421, + "compression_loss": 0.0, + "distillation_loss": 0.08234288543462753, + "epoch": 4.03, + "learning_rate": 1.917759494709322e-05, + "loss": 0.0861, + "step": 4240, + "task_loss": 0.12013165652751923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877476342379075, + "compression_loss": 0.0, + "distillation_loss": 0.134935200214386, + "epoch": 4.03, + "learning_rate": 1.9167233232727885e-05, + "loss": 0.1334, + "step": 4241, + "task_loss": 0.11945871263742447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787756417295467, + "compression_loss": 0.0, + "distillation_loss": 0.053517088294029236, + "epoch": 4.03, + "learning_rate": 1.915687257791273e-05, + "loss": 0.0582, + "step": 4242, + "task_loss": 0.10031777620315552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877651961546244, + "compression_loss": 0.0, + "distillation_loss": 0.1066487580537796, + "epoch": 4.03, + "learning_rate": 1.9146512984529793e-05, + "loss": 0.1059, + "step": 4243, + "task_loss": 0.09886283427476883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877739708163831, + "compression_loss": 0.0, + "distillation_loss": 0.23031002283096313, + "epoch": 4.03, + "learning_rate": 1.913615445446098e-05, + "loss": 0.2218, + "step": 4244, + "task_loss": 0.14517052471637726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7877827412817472, + "compression_loss": 0.0, + "distillation_loss": 0.2954586148262024, + "epoch": 4.03, + "learning_rate": 1.9125796989587947e-05, + "loss": 0.3012, + "step": 4245, + "task_loss": 0.35336050391197205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.78779150755172, + "compression_loss": 0.0, + "distillation_loss": 0.0650944635272026, + "epoch": 4.03, + "learning_rate": 1.9115440591792182e-05, + "loss": 0.063, + "step": 4246, + "task_loss": 0.044294241815805435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878002696273054, + "compression_loss": 0.0, + "distillation_loss": 0.070266954600811, + "epoch": 4.03, + "learning_rate": 1.9105085262954975e-05, + "loss": 0.0739, + "step": 4247, + "task_loss": 0.10610578209161758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787809027509507, + "compression_loss": 0.0, + "distillation_loss": 0.09126845002174377, + "epoch": 4.03, + "learning_rate": 1.9094731004957416e-05, + "loss": 0.0977, + "step": 4248, + "task_loss": 0.15584436058998108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878177811993284, + "compression_loss": 0.0, + "distillation_loss": 0.1306382119655609, + "epoch": 4.04, + "learning_rate": 1.9084377819680417e-05, + "loss": 0.1219, + "step": 4249, + "task_loss": 0.043007947504520416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878265306977735, + "compression_loss": 0.0, + "distillation_loss": 0.06510676443576813, + "epoch": 4.04, + "learning_rate": 1.9074025709004672e-05, + "loss": 0.0593, + "step": 4250, + "task_loss": 0.007219014689326286 + }, + { + "epoch": 4.04, + "eval_accuracy": 0.8910550458715596, + "eval_loss": 0.53865647315979, + "eval_runtime": 17.9464, + "eval_samples_per_second": 48.589, + "eval_steps_per_second": 6.074, + "step": 4250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878352760058459, + "compression_loss": 0.0, + "distillation_loss": 0.07825392484664917, + "epoch": 4.04, + "learning_rate": 1.9063674674810696e-05, + "loss": 0.0739, + "step": 4251, + "task_loss": 0.035188619047403336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878440171245491, + "compression_loss": 0.0, + "distillation_loss": 0.1569930762052536, + "epoch": 4.04, + "learning_rate": 1.90533247189788e-05, + "loss": 0.1545, + "step": 4252, + "task_loss": 0.1325351744890213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878527540548871, + "compression_loss": 0.0, + "distillation_loss": 0.0776907354593277, + "epoch": 4.04, + "learning_rate": 1.9042975843389115e-05, + "loss": 0.0752, + "step": 4253, + "task_loss": 0.05311368405818939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878614867978633, + "compression_loss": 0.0, + "distillation_loss": 0.02283748984336853, + "epoch": 4.04, + "learning_rate": 1.903262804992156e-05, + "loss": 0.0229, + "step": 4254, + "task_loss": 0.023530958220362663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878702153544814, + "compression_loss": 0.0, + "distillation_loss": 0.35799717903137207, + "epoch": 4.04, + "learning_rate": 1.9022281340455854e-05, + "loss": 0.3416, + "step": 4255, + "task_loss": 0.1938759684562683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878789397257453, + "compression_loss": 0.0, + "distillation_loss": 0.08360302448272705, + "epoch": 4.04, + "learning_rate": 1.9011935716871535e-05, + "loss": 0.0995, + "step": 4256, + "task_loss": 0.24218979477882385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878876599126585, + "compression_loss": 0.0, + "distillation_loss": 0.060659270733594894, + "epoch": 4.04, + "learning_rate": 1.900159118104793e-05, + "loss": 0.0644, + "step": 4257, + "task_loss": 0.09822718799114227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7878963759162247, + "compression_loss": 0.0, + "distillation_loss": 0.10400435328483582, + "epoch": 4.04, + "learning_rate": 1.8991247734864173e-05, + "loss": 0.1054, + "step": 4258, + "task_loss": 0.11802786588668823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879050877374478, + "compression_loss": 0.0, + "distillation_loss": 0.12243454158306122, + "epoch": 4.04, + "learning_rate": 1.898090538019921e-05, + "loss": 0.1253, + "step": 4259, + "task_loss": 0.15127399563789368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879137953773312, + "compression_loss": 0.0, + "distillation_loss": 0.04609953239560127, + "epoch": 4.05, + "learning_rate": 1.897056411893177e-05, + "loss": 0.0505, + "step": 4260, + "task_loss": 0.08963650465011597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879224988368786, + "compression_loss": 0.0, + "distillation_loss": 0.02292802557349205, + "epoch": 4.05, + "learning_rate": 1.896022395294039e-05, + "loss": 0.0211, + "step": 4261, + "task_loss": 0.004292329773306847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879311981170939, + "compression_loss": 0.0, + "distillation_loss": 0.037770919501781464, + "epoch": 4.05, + "learning_rate": 1.8949884884103418e-05, + "loss": 0.0359, + "step": 4262, + "task_loss": 0.019271956756711006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879398932189805, + "compression_loss": 0.0, + "distillation_loss": 0.16756854951381683, + "epoch": 4.05, + "learning_rate": 1.8939546914299e-05, + "loss": 0.171, + "step": 4263, + "task_loss": 0.20192977786064148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879485841435423, + "compression_loss": 0.0, + "distillation_loss": 0.07206468284130096, + "epoch": 4.05, + "learning_rate": 1.892921004540507e-05, + "loss": 0.0677, + "step": 4264, + "task_loss": 0.028736798092722893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879572708917829, + "compression_loss": 0.0, + "distillation_loss": 0.0855240449309349, + "epoch": 4.05, + "learning_rate": 1.8918874279299372e-05, + "loss": 0.0786, + "step": 4265, + "task_loss": 0.01638454757630825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.787965953464706, + "compression_loss": 0.0, + "distillation_loss": 0.06442791223526001, + "epoch": 4.05, + "learning_rate": 1.8908539617859456e-05, + "loss": 0.0701, + "step": 4266, + "task_loss": 0.12096739560365677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879746318633153, + "compression_loss": 0.0, + "distillation_loss": 0.07881754636764526, + "epoch": 4.05, + "learning_rate": 1.8898206062962647e-05, + "loss": 0.0732, + "step": 4267, + "task_loss": 0.022822581231594086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879833060886144, + "compression_loss": 0.0, + "distillation_loss": 0.09095026552677155, + "epoch": 4.05, + "learning_rate": 1.88878736164861e-05, + "loss": 0.0839, + "step": 4268, + "task_loss": 0.020265545696020126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7879919761416071, + "compression_loss": 0.0, + "distillation_loss": 0.19944968819618225, + "epoch": 4.05, + "learning_rate": 1.8877542280306728e-05, + "loss": 0.2095, + "step": 4269, + "task_loss": 0.2999950647354126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788000642023297, + "compression_loss": 0.0, + "distillation_loss": 0.08276577293872833, + "epoch": 4.06, + "learning_rate": 1.8867212056301305e-05, + "loss": 0.0792, + "step": 4270, + "task_loss": 0.04671813175082207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880093037346878, + "compression_loss": 0.0, + "distillation_loss": 0.21997350454330444, + "epoch": 4.06, + "learning_rate": 1.8856882946346344e-05, + "loss": 0.2087, + "step": 4271, + "task_loss": 0.10748877376317978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880179612767833, + "compression_loss": 0.0, + "distillation_loss": 0.14946487545967102, + "epoch": 4.06, + "learning_rate": 1.8846554952318178e-05, + "loss": 0.1507, + "step": 4272, + "task_loss": 0.1618417650461197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880266146505869, + "compression_loss": 0.0, + "distillation_loss": 0.25104832649230957, + "epoch": 4.06, + "learning_rate": 1.8836228076092945e-05, + "loss": 0.2645, + "step": 4273, + "task_loss": 0.3852643370628357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880352638571025, + "compression_loss": 0.0, + "distillation_loss": 0.01792140305042267, + "epoch": 4.06, + "learning_rate": 1.8825902319546565e-05, + "loss": 0.0165, + "step": 4274, + "task_loss": 0.0033651478588581085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880439088973338, + "compression_loss": 0.0, + "distillation_loss": 0.051164254546165466, + "epoch": 4.06, + "learning_rate": 1.881557768455477e-05, + "loss": 0.0474, + "step": 4275, + "task_loss": 0.013138506561517715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880525497722843, + "compression_loss": 0.0, + "distillation_loss": 0.1809595227241516, + "epoch": 4.06, + "learning_rate": 1.8805254172993064e-05, + "loss": 0.1699, + "step": 4276, + "task_loss": 0.07031507790088654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880611864829579, + "compression_loss": 0.0, + "distillation_loss": 0.09073644876480103, + "epoch": 4.06, + "learning_rate": 1.879493178673677e-05, + "loss": 0.0901, + "step": 4277, + "task_loss": 0.08396268635988235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880698190303581, + "compression_loss": 0.0, + "distillation_loss": 0.07403849810361862, + "epoch": 4.06, + "learning_rate": 1.8784610527661e-05, + "loss": 0.0697, + "step": 4278, + "task_loss": 0.03097301721572876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880784474154886, + "compression_loss": 0.0, + "distillation_loss": 0.10361620783805847, + "epoch": 4.06, + "learning_rate": 1.8774290397640664e-05, + "loss": 0.1067, + "step": 4279, + "task_loss": 0.13399073481559753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880870716393533, + "compression_loss": 0.0, + "distillation_loss": 0.04671156406402588, + "epoch": 4.06, + "learning_rate": 1.876397139855047e-05, + "loss": 0.0516, + "step": 4280, + "task_loss": 0.09573487192392349 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7880956917029556, + "compression_loss": 0.0, + "distillation_loss": 0.06411126255989075, + "epoch": 4.07, + "learning_rate": 1.8753653532264894e-05, + "loss": 0.0698, + "step": 4281, + "task_loss": 0.12079112231731415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881043076072994, + "compression_loss": 0.0, + "distillation_loss": 0.10022042691707611, + "epoch": 4.07, + "learning_rate": 1.8743336800658245e-05, + "loss": 0.1016, + "step": 4282, + "task_loss": 0.11449723690748215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881129193533882, + "compression_loss": 0.0, + "distillation_loss": 0.13063356280326843, + "epoch": 4.07, + "learning_rate": 1.8733021205604596e-05, + "loss": 0.129, + "step": 4283, + "task_loss": 0.11380869895219803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881215269422258, + "compression_loss": 0.0, + "distillation_loss": 0.171952024102211, + "epoch": 4.07, + "learning_rate": 1.872270674897782e-05, + "loss": 0.1669, + "step": 4284, + "task_loss": 0.12174376845359802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881301303748159, + "compression_loss": 0.0, + "distillation_loss": 0.02115825191140175, + "epoch": 4.07, + "learning_rate": 1.8712393432651603e-05, + "loss": 0.0195, + "step": 4285, + "task_loss": 0.0041760653257369995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881387296521621, + "compression_loss": 0.0, + "distillation_loss": 0.03141398727893829, + "epoch": 4.07, + "learning_rate": 1.87020812584994e-05, + "loss": 0.0287, + "step": 4286, + "task_loss": 0.0046629346907138824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881473247752682, + "compression_loss": 0.0, + "distillation_loss": 0.10433746874332428, + "epoch": 4.07, + "learning_rate": 1.8691770228394456e-05, + "loss": 0.1159, + "step": 4287, + "task_loss": 0.21988573670387268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881559157451377, + "compression_loss": 0.0, + "distillation_loss": 0.08133610337972641, + "epoch": 4.07, + "learning_rate": 1.868146034420984e-05, + "loss": 0.0763, + "step": 4288, + "task_loss": 0.03072592243552208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881645025627744, + "compression_loss": 0.0, + "distillation_loss": 0.11363761126995087, + "epoch": 4.07, + "learning_rate": 1.8671151607818382e-05, + "loss": 0.1087, + "step": 4289, + "task_loss": 0.06467755138874054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788173085229182, + "compression_loss": 0.0, + "distillation_loss": 0.051334887742996216, + "epoch": 4.07, + "learning_rate": 1.8660844021092716e-05, + "loss": 0.0478, + "step": 4290, + "task_loss": 0.016054822131991386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788181663745364, + "compression_loss": 0.0, + "distillation_loss": 0.08099980652332306, + "epoch": 4.08, + "learning_rate": 1.8650537585905258e-05, + "loss": 0.0847, + "step": 4291, + "task_loss": 0.11772287636995316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881902381123244, + "compression_loss": 0.0, + "distillation_loss": 0.015967125073075294, + "epoch": 4.08, + "learning_rate": 1.8640232304128236e-05, + "loss": 0.0146, + "step": 4292, + "task_loss": 0.002349233254790306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7881988083310666, + "compression_loss": 0.0, + "distillation_loss": 0.03628503531217575, + "epoch": 4.08, + "learning_rate": 1.8629928177633637e-05, + "loss": 0.0488, + "step": 4293, + "task_loss": 0.16185718774795532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882073744025945, + "compression_loss": 0.0, + "distillation_loss": 0.05933556705713272, + "epoch": 4.08, + "learning_rate": 1.8619625208293268e-05, + "loss": 0.0568, + "step": 4294, + "task_loss": 0.03381485491991043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882159363279116, + "compression_loss": 0.0, + "distillation_loss": 0.16856390237808228, + "epoch": 4.08, + "learning_rate": 1.86093233979787e-05, + "loss": 0.1643, + "step": 4295, + "task_loss": 0.12632951140403748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882244941080216, + "compression_loss": 0.0, + "distillation_loss": 0.07362917810678482, + "epoch": 4.08, + "learning_rate": 1.8599022748561325e-05, + "loss": 0.075, + "step": 4296, + "task_loss": 0.08769040554761887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882330477439283, + "compression_loss": 0.0, + "distillation_loss": 0.03499433398246765, + "epoch": 4.08, + "learning_rate": 1.8588723261912288e-05, + "loss": 0.0327, + "step": 4297, + "task_loss": 0.012281343340873718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882415972366354, + "compression_loss": 0.0, + "distillation_loss": 0.11919771134853363, + "epoch": 4.08, + "learning_rate": 1.857842493990255e-05, + "loss": 0.1097, + "step": 4298, + "task_loss": 0.023989982903003693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882501425871464, + "compression_loss": 0.0, + "distillation_loss": 0.11720117926597595, + "epoch": 4.08, + "learning_rate": 1.856812778440285e-05, + "loss": 0.1113, + "step": 4299, + "task_loss": 0.057778194546699524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882586837964651, + "compression_loss": 0.0, + "distillation_loss": 0.24679766595363617, + "epoch": 4.08, + "learning_rate": 1.8557831797283716e-05, + "loss": 0.2454, + "step": 4300, + "task_loss": 0.23254123330116272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882672208655952, + "compression_loss": 0.0, + "distillation_loss": 0.05673684924840927, + "epoch": 4.08, + "learning_rate": 1.8547536980415452e-05, + "loss": 0.0526, + "step": 4301, + "task_loss": 0.014921434223651886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882757537955404, + "compression_loss": 0.0, + "distillation_loss": 0.06865353882312775, + "epoch": 4.09, + "learning_rate": 1.8537243335668187e-05, + "loss": 0.0635, + "step": 4302, + "task_loss": 0.01695919781923294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882842825873042, + "compression_loss": 0.0, + "distillation_loss": 0.08616451919078827, + "epoch": 4.09, + "learning_rate": 1.8526950864911784e-05, + "loss": 0.0925, + "step": 4303, + "task_loss": 0.14940661191940308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7882928072418904, + "compression_loss": 0.0, + "distillation_loss": 0.0653180330991745, + "epoch": 4.09, + "learning_rate": 1.8516659570015924e-05, + "loss": 0.0677, + "step": 4304, + "task_loss": 0.08950569480657578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883013277603028, + "compression_loss": 0.0, + "distillation_loss": 0.01897302269935608, + "epoch": 4.09, + "learning_rate": 1.8506369452850087e-05, + "loss": 0.0177, + "step": 4305, + "task_loss": 0.0065006837248802185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788309844143545, + "compression_loss": 0.0, + "distillation_loss": 0.0725211426615715, + "epoch": 4.09, + "learning_rate": 1.8496080515283514e-05, + "loss": 0.0802, + "step": 4306, + "task_loss": 0.14979489147663116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883183563926206, + "compression_loss": 0.0, + "distillation_loss": 0.03266327083110809, + "epoch": 4.09, + "learning_rate": 1.8485792759185232e-05, + "loss": 0.0298, + "step": 4307, + "task_loss": 0.0037787500768899918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883268645085333, + "compression_loss": 0.0, + "distillation_loss": 0.02446456253528595, + "epoch": 4.09, + "learning_rate": 1.8475506186424074e-05, + "loss": 0.0224, + "step": 4308, + "task_loss": 0.004015939310193062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883353684922869, + "compression_loss": 0.0, + "distillation_loss": 0.029062699526548386, + "epoch": 4.09, + "learning_rate": 1.846522079886864e-05, + "loss": 0.0269, + "step": 4309, + "task_loss": 0.007744431495666504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788343868344885, + "compression_loss": 0.0, + "distillation_loss": 0.026423152536153793, + "epoch": 4.09, + "learning_rate": 1.8454936598387317e-05, + "loss": 0.0375, + "step": 4310, + "task_loss": 0.13689905405044556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883523640673312, + "compression_loss": 0.0, + "distillation_loss": 0.18082647025585175, + "epoch": 4.09, + "learning_rate": 1.8444653586848286e-05, + "loss": 0.1714, + "step": 4311, + "task_loss": 0.08704258501529694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883608556606293, + "compression_loss": 0.0, + "distillation_loss": 0.06727072596549988, + "epoch": 4.09, + "learning_rate": 1.8434371766119496e-05, + "loss": 0.0641, + "step": 4312, + "task_loss": 0.03516186401247978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788369343125783, + "compression_loss": 0.0, + "distillation_loss": 0.02708693966269493, + "epoch": 4.1, + "learning_rate": 1.8424091138068692e-05, + "loss": 0.0254, + "step": 4313, + "task_loss": 0.009996037930250168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788377826463796, + "compression_loss": 0.0, + "distillation_loss": 0.036099981516599655, + "epoch": 4.1, + "learning_rate": 1.8413811704563405e-05, + "loss": 0.0384, + "step": 4314, + "task_loss": 0.059546999633312225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883863056756718, + "compression_loss": 0.0, + "distillation_loss": 0.048894353210926056, + "epoch": 4.1, + "learning_rate": 1.8403533467470946e-05, + "loss": 0.0634, + "step": 4315, + "task_loss": 0.1943952441215515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7883947807624142, + "compression_loss": 0.0, + "distillation_loss": 0.1497052013874054, + "epoch": 4.1, + "learning_rate": 1.8393256428658403e-05, + "loss": 0.1447, + "step": 4316, + "task_loss": 0.0995352566242218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884032517250269, + "compression_loss": 0.0, + "distillation_loss": 0.07365826517343521, + "epoch": 4.1, + "learning_rate": 1.8382980589992643e-05, + "loss": 0.0695, + "step": 4317, + "task_loss": 0.031793296337127686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884117185645135, + "compression_loss": 0.0, + "distillation_loss": 0.058896757662296295, + "epoch": 4.1, + "learning_rate": 1.8372705953340337e-05, + "loss": 0.0537, + "step": 4318, + "task_loss": 0.006742917001247406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884201812818779, + "compression_loss": 0.0, + "distillation_loss": 0.057655736804008484, + "epoch": 4.1, + "learning_rate": 1.8362432520567903e-05, + "loss": 0.0562, + "step": 4319, + "task_loss": 0.04272696375846863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884286398781235, + "compression_loss": 0.0, + "distillation_loss": 0.06040318310260773, + "epoch": 4.1, + "learning_rate": 1.8352160293541566e-05, + "loss": 0.0561, + "step": 4320, + "task_loss": 0.017850998789072037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884370943542541, + "compression_loss": 0.0, + "distillation_loss": 0.028085466474294662, + "epoch": 4.1, + "learning_rate": 1.834188927412732e-05, + "loss": 0.0255, + "step": 4321, + "task_loss": 0.002726750448346138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884455447112735, + "compression_loss": 0.0, + "distillation_loss": 0.014654599130153656, + "epoch": 4.1, + "learning_rate": 1.833161946419097e-05, + "loss": 0.0203, + "step": 4322, + "task_loss": 0.07066480070352554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884539909501852, + "compression_loss": 0.0, + "distillation_loss": 0.14747686684131622, + "epoch": 4.11, + "learning_rate": 1.8321350865598057e-05, + "loss": 0.1489, + "step": 4323, + "task_loss": 0.16213619709014893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884624330719929, + "compression_loss": 0.0, + "distillation_loss": 0.030887076631188393, + "epoch": 4.11, + "learning_rate": 1.831108348021392e-05, + "loss": 0.0283, + "step": 4324, + "task_loss": 0.005010116845369339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884708710777004, + "compression_loss": 0.0, + "distillation_loss": 0.056699033826589584, + "epoch": 4.11, + "learning_rate": 1.8300817309903686e-05, + "loss": 0.0685, + "step": 4325, + "task_loss": 0.17428170144557953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884793049683114, + "compression_loss": 0.0, + "distillation_loss": 0.03099614754319191, + "epoch": 4.11, + "learning_rate": 1.8290552356532247e-05, + "loss": 0.0287, + "step": 4326, + "task_loss": 0.008489016443490982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884877347448294, + "compression_loss": 0.0, + "distillation_loss": 0.04078204557299614, + "epoch": 4.11, + "learning_rate": 1.8280288621964288e-05, + "loss": 0.0456, + "step": 4327, + "task_loss": 0.08923979848623276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7884961604082582, + "compression_loss": 0.0, + "distillation_loss": 0.0556565560400486, + "epoch": 4.11, + "learning_rate": 1.827002610806427e-05, + "loss": 0.0541, + "step": 4328, + "task_loss": 0.03984564542770386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885045819596015, + "compression_loss": 0.0, + "distillation_loss": 0.06724396347999573, + "epoch": 4.11, + "learning_rate": 1.825976481669641e-05, + "loss": 0.0626, + "step": 4329, + "task_loss": 0.02046894282102585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885129993998629, + "compression_loss": 0.0, + "distillation_loss": 0.034551121294498444, + "epoch": 4.11, + "learning_rate": 1.824950474972473e-05, + "loss": 0.0469, + "step": 4330, + "task_loss": 0.15822812914848328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885214127300461, + "compression_loss": 0.0, + "distillation_loss": 0.03939036652445793, + "epoch": 4.11, + "learning_rate": 1.823924590901303e-05, + "loss": 0.0436, + "step": 4331, + "task_loss": 0.08171267807483673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885298219511548, + "compression_loss": 0.0, + "distillation_loss": 0.046313732862472534, + "epoch": 4.11, + "learning_rate": 1.8228988296424877e-05, + "loss": 0.0426, + "step": 4332, + "task_loss": 0.009343873709440231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885382270641927, + "compression_loss": 0.0, + "distillation_loss": 0.16361156105995178, + "epoch": 4.11, + "learning_rate": 1.82187319138236e-05, + "loss": 0.1613, + "step": 4333, + "task_loss": 0.14066770672798157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885466280701636, + "compression_loss": 0.0, + "distillation_loss": 0.12138652801513672, + "epoch": 4.12, + "learning_rate": 1.8208476763072332e-05, + "loss": 0.1152, + "step": 4334, + "task_loss": 0.05983586981892586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788555024970071, + "compression_loss": 0.0, + "distillation_loss": 0.05892729386687279, + "epoch": 4.12, + "learning_rate": 1.8198222846033975e-05, + "loss": 0.0612, + "step": 4335, + "task_loss": 0.0814908966422081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885634177649186, + "compression_loss": 0.0, + "distillation_loss": 0.01657470129430294, + "epoch": 4.12, + "learning_rate": 1.8187970164571187e-05, + "loss": 0.0158, + "step": 4336, + "task_loss": 0.008594617247581482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885718064557101, + "compression_loss": 0.0, + "distillation_loss": 0.16905143857002258, + "epoch": 4.12, + "learning_rate": 1.817771872054643e-05, + "loss": 0.16, + "step": 4337, + "task_loss": 0.07822901755571365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885801910434493, + "compression_loss": 0.0, + "distillation_loss": 0.12039444595575333, + "epoch": 4.12, + "learning_rate": 1.8167468515821924e-05, + "loss": 0.1294, + "step": 4338, + "task_loss": 0.210740864276886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885885715291397, + "compression_loss": 0.0, + "distillation_loss": 0.320880651473999, + "epoch": 4.12, + "learning_rate": 1.815721955225966e-05, + "loss": 0.3213, + "step": 4339, + "task_loss": 0.32472920417785645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7885969479137851, + "compression_loss": 0.0, + "distillation_loss": 0.11701447516679764, + "epoch": 4.12, + "learning_rate": 1.8146971831721426e-05, + "loss": 0.1076, + "step": 4340, + "task_loss": 0.023150721564888954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886053201983891, + "compression_loss": 0.0, + "distillation_loss": 0.05050637945532799, + "epoch": 4.12, + "learning_rate": 1.8136725356068762e-05, + "loss": 0.0468, + "step": 4341, + "task_loss": 0.013836957514286041 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886136883839555, + "compression_loss": 0.0, + "distillation_loss": 0.14656773209571838, + "epoch": 4.12, + "learning_rate": 1.8126480127163e-05, + "loss": 0.1385, + "step": 4342, + "task_loss": 0.06583775579929352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788622052471488, + "compression_loss": 0.0, + "distillation_loss": 0.17784056067466736, + "epoch": 4.12, + "learning_rate": 1.8116236146865213e-05, + "loss": 0.1849, + "step": 4343, + "task_loss": 0.24860072135925293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886304124619901, + "compression_loss": 0.0, + "distillation_loss": 0.03818412870168686, + "epoch": 4.13, + "learning_rate": 1.810599341703629e-05, + "loss": 0.0394, + "step": 4344, + "task_loss": 0.0502682588994503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886387683564655, + "compression_loss": 0.0, + "distillation_loss": 0.1047634482383728, + "epoch": 4.13, + "learning_rate": 1.8095751939536866e-05, + "loss": 0.0986, + "step": 4345, + "task_loss": 0.04341891035437584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788647120155918, + "compression_loss": 0.0, + "distillation_loss": 0.26263827085494995, + "epoch": 4.13, + "learning_rate": 1.8085511716227345e-05, + "loss": 0.255, + "step": 4346, + "task_loss": 0.18654467165470123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886554678613513, + "compression_loss": 0.0, + "distillation_loss": 0.09224887192249298, + "epoch": 4.13, + "learning_rate": 1.807527274896792e-05, + "loss": 0.0892, + "step": 4347, + "task_loss": 0.06142377480864525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788663811473769, + "compression_loss": 0.0, + "distillation_loss": 0.12528349459171295, + "epoch": 4.13, + "learning_rate": 1.8065035039618556e-05, + "loss": 0.1222, + "step": 4348, + "task_loss": 0.09476065635681152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886721509941748, + "compression_loss": 0.0, + "distillation_loss": 0.04307865723967552, + "epoch": 4.13, + "learning_rate": 1.8054798590038984e-05, + "loss": 0.0394, + "step": 4349, + "task_loss": 0.006099509075284004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886804864235724, + "compression_loss": 0.0, + "distillation_loss": 0.046839505434036255, + "epoch": 4.13, + "learning_rate": 1.8044563402088684e-05, + "loss": 0.0433, + "step": 4350, + "task_loss": 0.01109330728650093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886888177629654, + "compression_loss": 0.0, + "distillation_loss": 0.03697451949119568, + "epoch": 4.13, + "learning_rate": 1.8034329477626945e-05, + "loss": 0.0493, + "step": 4351, + "task_loss": 0.16054922342300415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7886971450133576, + "compression_loss": 0.0, + "distillation_loss": 0.066026471555233, + "epoch": 4.13, + "learning_rate": 1.8024096818512807e-05, + "loss": 0.0672, + "step": 4352, + "task_loss": 0.07799084484577179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887054681757527, + "compression_loss": 0.0, + "distillation_loss": 0.047900401055812836, + "epoch": 4.13, + "learning_rate": 1.8013865426605076e-05, + "loss": 0.0578, + "step": 4353, + "task_loss": 0.14682576060295105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887137872511543, + "compression_loss": 0.0, + "distillation_loss": 0.04553601145744324, + "epoch": 4.13, + "learning_rate": 1.8003635303762336e-05, + "loss": 0.0483, + "step": 4354, + "task_loss": 0.0731407031416893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788722102240566, + "compression_loss": 0.0, + "distillation_loss": 0.07538627833127975, + "epoch": 4.14, + "learning_rate": 1.7993406451842935e-05, + "loss": 0.0774, + "step": 4355, + "task_loss": 0.0952514261007309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887304131449917, + "compression_loss": 0.0, + "distillation_loss": 0.2628774642944336, + "epoch": 4.14, + "learning_rate": 1.7983178872704992e-05, + "loss": 0.2549, + "step": 4356, + "task_loss": 0.1826838105916977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887387199654349, + "compression_loss": 0.0, + "distillation_loss": 0.10139012336730957, + "epoch": 4.14, + "learning_rate": 1.7972952568206402e-05, + "loss": 0.1041, + "step": 4357, + "task_loss": 0.12895506620407104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887470227028993, + "compression_loss": 0.0, + "distillation_loss": 0.19154085218906403, + "epoch": 4.14, + "learning_rate": 1.7962727540204827e-05, + "loss": 0.1773, + "step": 4358, + "task_loss": 0.04891242831945419 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887553213583888, + "compression_loss": 0.0, + "distillation_loss": 0.09380476176738739, + "epoch": 4.14, + "learning_rate": 1.7952503790557686e-05, + "loss": 0.097, + "step": 4359, + "task_loss": 0.1261102557182312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887636159329067, + "compression_loss": 0.0, + "distillation_loss": 0.10539199411869049, + "epoch": 4.14, + "learning_rate": 1.7942281321122168e-05, + "loss": 0.1145, + "step": 4360, + "task_loss": 0.19654574990272522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788771906427457, + "compression_loss": 0.0, + "distillation_loss": 0.021082423627376556, + "epoch": 4.14, + "learning_rate": 1.7932060133755245e-05, + "loss": 0.0256, + "step": 4361, + "task_loss": 0.06635300070047379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887801928430432, + "compression_loss": 0.0, + "distillation_loss": 0.020916704088449478, + "epoch": 4.14, + "learning_rate": 1.792184023031363e-05, + "loss": 0.0241, + "step": 4362, + "task_loss": 0.05260460451245308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887884751806692, + "compression_loss": 0.0, + "distillation_loss": 0.13232557475566864, + "epoch": 4.14, + "learning_rate": 1.7911621612653832e-05, + "loss": 0.1251, + "step": 4363, + "task_loss": 0.060476090759038925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7887967534413384, + "compression_loss": 0.0, + "distillation_loss": 0.1687774807214737, + "epoch": 4.14, + "learning_rate": 1.7901404282632105e-05, + "loss": 0.156, + "step": 4364, + "task_loss": 0.04108697548508644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888050276260546, + "compression_loss": 0.0, + "distillation_loss": 0.052245475351810455, + "epoch": 4.15, + "learning_rate": 1.7891188242104466e-05, + "loss": 0.0564, + "step": 4365, + "task_loss": 0.09406599402427673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888132977358217, + "compression_loss": 0.0, + "distillation_loss": 0.02484312653541565, + "epoch": 4.15, + "learning_rate": 1.7880973492926734e-05, + "loss": 0.0229, + "step": 4366, + "task_loss": 0.005430508404970169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788821563771643, + "compression_loss": 0.0, + "distillation_loss": 0.06918057799339294, + "epoch": 4.15, + "learning_rate": 1.7870760036954444e-05, + "loss": 0.0648, + "step": 4367, + "task_loss": 0.02556915022432804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888298257345224, + "compression_loss": 0.0, + "distillation_loss": 0.19346660375595093, + "epoch": 4.15, + "learning_rate": 1.786054787604294e-05, + "loss": 0.1922, + "step": 4368, + "task_loss": 0.18102970719337463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888380836254636, + "compression_loss": 0.0, + "distillation_loss": 0.1463518738746643, + "epoch": 4.15, + "learning_rate": 1.7850337012047287e-05, + "loss": 0.155, + "step": 4369, + "task_loss": 0.23283621668815613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888463374454703, + "compression_loss": 0.0, + "distillation_loss": 0.06679415702819824, + "epoch": 4.15, + "learning_rate": 1.784012744682235e-05, + "loss": 0.0708, + "step": 4370, + "task_loss": 0.10708191245794296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888545871955459, + "compression_loss": 0.0, + "distillation_loss": 0.015153437852859497, + "epoch": 4.15, + "learning_rate": 1.7829919182222752e-05, + "loss": 0.0192, + "step": 4371, + "task_loss": 0.0555480532348156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888628328766945, + "compression_loss": 0.0, + "distillation_loss": 0.04128699749708176, + "epoch": 4.15, + "learning_rate": 1.7819712220102857e-05, + "loss": 0.0446, + "step": 4372, + "task_loss": 0.07435785233974457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888710744899194, + "compression_loss": 0.0, + "distillation_loss": 0.07817722856998444, + "epoch": 4.15, + "learning_rate": 1.7809506562316818e-05, + "loss": 0.08, + "step": 4373, + "task_loss": 0.09686526656150818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888793120362246, + "compression_loss": 0.0, + "distillation_loss": 0.058174848556518555, + "epoch": 4.15, + "learning_rate": 1.7799302210718544e-05, + "loss": 0.0739, + "step": 4374, + "task_loss": 0.21509107947349548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888875455166136, + "compression_loss": 0.0, + "distillation_loss": 0.02214631997048855, + "epoch": 4.15, + "learning_rate": 1.7789099167161704e-05, + "loss": 0.0367, + "step": 4375, + "task_loss": 0.1679784506559372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7888957749320901, + "compression_loss": 0.0, + "distillation_loss": 0.03224210441112518, + "epoch": 4.16, + "learning_rate": 1.777889743349973e-05, + "loss": 0.0327, + "step": 4376, + "task_loss": 0.036665864288806915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889040002836578, + "compression_loss": 0.0, + "distillation_loss": 0.04349253326654434, + "epoch": 4.16, + "learning_rate": 1.776869701158581e-05, + "loss": 0.041, + "step": 4377, + "task_loss": 0.01853703148663044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889122215723204, + "compression_loss": 0.0, + "distillation_loss": 0.03350648283958435, + "epoch": 4.16, + "learning_rate": 1.775849790327291e-05, + "loss": 0.0312, + "step": 4378, + "task_loss": 0.010053610429167747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889204387990816, + "compression_loss": 0.0, + "distillation_loss": 0.11184386909008026, + "epoch": 4.16, + "learning_rate": 1.7748300110413737e-05, + "loss": 0.105, + "step": 4379, + "task_loss": 0.04385565221309662 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.788928651964945, + "compression_loss": 0.0, + "distillation_loss": 0.022888878360390663, + "epoch": 4.16, + "learning_rate": 1.7738103634860776e-05, + "loss": 0.0224, + "step": 4380, + "task_loss": 0.017647787928581238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889368610709144, + "compression_loss": 0.0, + "distillation_loss": 0.11289818584918976, + "epoch": 4.16, + "learning_rate": 1.7727908478466264e-05, + "loss": 0.1062, + "step": 4381, + "task_loss": 0.04638146981596947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889450661179934, + "compression_loss": 0.0, + "distillation_loss": 0.1492418795824051, + "epoch": 4.16, + "learning_rate": 1.771771464308219e-05, + "loss": 0.1528, + "step": 4382, + "task_loss": 0.18500253558158875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889532671071856, + "compression_loss": 0.0, + "distillation_loss": 0.13683563470840454, + "epoch": 4.16, + "learning_rate": 1.770752213056033e-05, + "loss": 0.137, + "step": 4383, + "task_loss": 0.13819673657417297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889614640394949, + "compression_loss": 0.0, + "distillation_loss": 0.02154296077787876, + "epoch": 4.16, + "learning_rate": 1.7697330942752193e-05, + "loss": 0.0206, + "step": 4384, + "task_loss": 0.012236261740326881 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889696569159248, + "compression_loss": 0.0, + "distillation_loss": 0.041471950709819794, + "epoch": 4.16, + "learning_rate": 1.768714108150907e-05, + "loss": 0.044, + "step": 4385, + "task_loss": 0.06637803465127945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889778457374791, + "compression_loss": 0.0, + "distillation_loss": 0.1052827313542366, + "epoch": 4.17, + "learning_rate": 1.767695254868198e-05, + "loss": 0.1006, + "step": 4386, + "task_loss": 0.057975780218839645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889860305051614, + "compression_loss": 0.0, + "distillation_loss": 0.0629846528172493, + "epoch": 4.17, + "learning_rate": 1.766676534612173e-05, + "loss": 0.0609, + "step": 4387, + "task_loss": 0.042560774832963943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7889942112199754, + "compression_loss": 0.0, + "distillation_loss": 0.039940863847732544, + "epoch": 4.17, + "learning_rate": 1.7656579475678876e-05, + "loss": 0.0388, + "step": 4388, + "task_loss": 0.028355613350868225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890023878829248, + "compression_loss": 0.0, + "distillation_loss": 0.03699345886707306, + "epoch": 4.17, + "learning_rate": 1.764639493920372e-05, + "loss": 0.041, + "step": 4389, + "task_loss": 0.07748576998710632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890105604950132, + "compression_loss": 0.0, + "distillation_loss": 0.03237525001168251, + "epoch": 4.17, + "learning_rate": 1.763621173854635e-05, + "loss": 0.0299, + "step": 4390, + "task_loss": 0.007551593706011772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890187290572445, + "compression_loss": 0.0, + "distillation_loss": 0.08855956047773361, + "epoch": 4.17, + "learning_rate": 1.762602987555656e-05, + "loss": 0.0878, + "step": 4391, + "task_loss": 0.08059325814247131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890268935706221, + "compression_loss": 0.0, + "distillation_loss": 0.04030374437570572, + "epoch": 4.17, + "learning_rate": 1.7615849352083975e-05, + "loss": 0.0561, + "step": 4392, + "task_loss": 0.1978634148836136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890350540361498, + "compression_loss": 0.0, + "distillation_loss": 0.021272670477628708, + "epoch": 4.17, + "learning_rate": 1.760567016997791e-05, + "loss": 0.0197, + "step": 4393, + "task_loss": 0.005058445036411285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890432104548314, + "compression_loss": 0.0, + "distillation_loss": 0.13198691606521606, + "epoch": 4.17, + "learning_rate": 1.760567016997791e-05, + "loss": 0.128, + "step": 4394, + "task_loss": 0.09192854911088943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890513628276704, + "compression_loss": 0.0, + "distillation_loss": 0.14407461881637573, + "epoch": 4.17, + "learning_rate": 1.7595492331087472e-05, + "loss": 0.1359, + "step": 4395, + "task_loss": 0.06224524974822998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890595111556706, + "compression_loss": 0.0, + "distillation_loss": 0.028794020414352417, + "epoch": 4.17, + "learning_rate": 1.7585315837261518e-05, + "loss": 0.0571, + "step": 4396, + "task_loss": 0.31160321831703186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890676554398357, + "compression_loss": 0.0, + "distillation_loss": 0.1819879114627838, + "epoch": 4.18, + "learning_rate": 1.7575140690348647e-05, + "loss": 0.1749, + "step": 4397, + "task_loss": 0.11067160218954086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890757956811694, + "compression_loss": 0.0, + "distillation_loss": 0.0241214781999588, + "epoch": 4.18, + "learning_rate": 1.756496689219723e-05, + "loss": 0.0231, + "step": 4398, + "task_loss": 0.013442834839224815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890839318806752, + "compression_loss": 0.0, + "distillation_loss": 0.0341639518737793, + "epoch": 4.18, + "learning_rate": 1.755479444465538e-05, + "loss": 0.0382, + "step": 4399, + "task_loss": 0.07429169118404388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7890920640393568, + "compression_loss": 0.0, + "distillation_loss": 0.06670385599136353, + "epoch": 4.18, + "learning_rate": 1.7544623349570973e-05, + "loss": 0.071, + "step": 4400, + "task_loss": 0.11016245931386948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891001921582181, + "compression_loss": 0.0, + "distillation_loss": 0.11859071999788284, + "epoch": 4.18, + "learning_rate": 1.7534453608791644e-05, + "loss": 0.1156, + "step": 4401, + "task_loss": 0.08839291334152222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891083162382626, + "compression_loss": 0.0, + "distillation_loss": 0.027910087257623672, + "epoch": 4.18, + "learning_rate": 1.7524285224164772e-05, + "loss": 0.0256, + "step": 4402, + "task_loss": 0.005291949957609177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891164362804941, + "compression_loss": 0.0, + "distillation_loss": 0.05852048099040985, + "epoch": 4.18, + "learning_rate": 1.7514118197537497e-05, + "loss": 0.0757, + "step": 4403, + "task_loss": 0.2301456481218338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891245522859162, + "compression_loss": 0.0, + "distillation_loss": 0.0825999528169632, + "epoch": 4.18, + "learning_rate": 1.75039525307567e-05, + "loss": 0.091, + "step": 4404, + "task_loss": 0.1666475236415863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891326642555326, + "compression_loss": 0.0, + "distillation_loss": 0.02013680338859558, + "epoch": 4.18, + "learning_rate": 1.7493788225669027e-05, + "loss": 0.0268, + "step": 4405, + "task_loss": 0.08626913279294968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789140772190347, + "compression_loss": 0.0, + "distillation_loss": 0.024234000593423843, + "epoch": 4.18, + "learning_rate": 1.7483625284120876e-05, + "loss": 0.031, + "step": 4406, + "task_loss": 0.09184201061725616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891488760913631, + "compression_loss": 0.0, + "distillation_loss": 0.11344337463378906, + "epoch": 4.19, + "learning_rate": 1.7473463707958388e-05, + "loss": 0.1076, + "step": 4407, + "task_loss": 0.055413682013750076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891569759595845, + "compression_loss": 0.0, + "distillation_loss": 0.021596118807792664, + "epoch": 4.19, + "learning_rate": 1.7463303499027466e-05, + "loss": 0.0199, + "step": 4408, + "task_loss": 0.004350002855062485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891650717960149, + "compression_loss": 0.0, + "distillation_loss": 0.03837398812174797, + "epoch": 4.19, + "learning_rate": 1.745314465917375e-05, + "loss": 0.0356, + "step": 4409, + "task_loss": 0.010302269831299782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789173163601658, + "compression_loss": 0.0, + "distillation_loss": 0.23183999955654144, + "epoch": 4.19, + "learning_rate": 1.7442987190242668e-05, + "loss": 0.2387, + "step": 4410, + "task_loss": 0.3002890646457672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891812513775176, + "compression_loss": 0.0, + "distillation_loss": 0.1880335658788681, + "epoch": 4.19, + "learning_rate": 1.7432831094079355e-05, + "loss": 0.1926, + "step": 4411, + "task_loss": 0.2336317002773285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891893351245972, + "compression_loss": 0.0, + "distillation_loss": 0.13378804922103882, + "epoch": 4.19, + "learning_rate": 1.7422676372528718e-05, + "loss": 0.13, + "step": 4412, + "task_loss": 0.09611691534519196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7891974148439006, + "compression_loss": 0.0, + "distillation_loss": 0.021651534363627434, + "epoch": 4.19, + "learning_rate": 1.7412523027435407e-05, + "loss": 0.0242, + "step": 4413, + "task_loss": 0.04686147719621658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892054905364314, + "compression_loss": 0.0, + "distillation_loss": 0.10245324671268463, + "epoch": 4.19, + "learning_rate": 1.740237106064383e-05, + "loss": 0.1, + "step": 4414, + "task_loss": 0.07831554114818573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892135622031934, + "compression_loss": 0.0, + "distillation_loss": 0.016463253647089005, + "epoch": 4.19, + "learning_rate": 1.7392220473998147e-05, + "loss": 0.0154, + "step": 4415, + "task_loss": 0.005847932770848274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892216298451902, + "compression_loss": 0.0, + "distillation_loss": 0.050443872809410095, + "epoch": 4.19, + "learning_rate": 1.738207126934225e-05, + "loss": 0.0581, + "step": 4416, + "task_loss": 0.12747488915920258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892296934634254, + "compression_loss": 0.0, + "distillation_loss": 0.11745072156190872, + "epoch": 4.19, + "learning_rate": 1.737192344851979e-05, + "loss": 0.112, + "step": 4417, + "task_loss": 0.06290709227323532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892377530589028, + "compression_loss": 0.0, + "distillation_loss": 0.041940174996852875, + "epoch": 4.2, + "learning_rate": 1.7361777013374173e-05, + "loss": 0.0392, + "step": 4418, + "task_loss": 0.01443542167544365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892458086326261, + "compression_loss": 0.0, + "distillation_loss": 0.18550318479537964, + "epoch": 4.2, + "learning_rate": 1.7351631965748555e-05, + "loss": 0.1769, + "step": 4419, + "task_loss": 0.09899017959833145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892538601855988, + "compression_loss": 0.0, + "distillation_loss": 0.02498285286128521, + "epoch": 4.2, + "learning_rate": 1.734148830748582e-05, + "loss": 0.023, + "step": 4420, + "task_loss": 0.0047489944845438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892619077188249, + "compression_loss": 0.0, + "distillation_loss": 0.02473044954240322, + "epoch": 4.2, + "learning_rate": 1.733134604042862e-05, + "loss": 0.0284, + "step": 4421, + "task_loss": 0.06133115291595459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892699512333078, + "compression_loss": 0.0, + "distillation_loss": 0.07032322883605957, + "epoch": 4.2, + "learning_rate": 1.7321205166419348e-05, + "loss": 0.0686, + "step": 4422, + "task_loss": 0.052621208131313324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892779907300513, + "compression_loss": 0.0, + "distillation_loss": 0.06657904386520386, + "epoch": 4.2, + "learning_rate": 1.7311065687300133e-05, + "loss": 0.0622, + "step": 4423, + "task_loss": 0.022893797606229782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789286026210059, + "compression_loss": 0.0, + "distillation_loss": 0.06309516727924347, + "epoch": 4.2, + "learning_rate": 1.730092760491287e-05, + "loss": 0.0599, + "step": 4424, + "task_loss": 0.031243909150362015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7892940576743348, + "compression_loss": 0.0, + "distillation_loss": 0.07031765580177307, + "epoch": 4.2, + "learning_rate": 1.729079092109919e-05, + "loss": 0.0728, + "step": 4425, + "task_loss": 0.09468785673379898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893020851238821, + "compression_loss": 0.0, + "distillation_loss": 0.2251608967781067, + "epoch": 4.2, + "learning_rate": 1.7280655637700456e-05, + "loss": 0.222, + "step": 4426, + "task_loss": 0.1932929903268814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893101085597048, + "compression_loss": 0.0, + "distillation_loss": 0.022628076374530792, + "epoch": 4.2, + "learning_rate": 1.7270521756557805e-05, + "loss": 0.0233, + "step": 4427, + "task_loss": 0.02976515144109726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893181279828064, + "compression_loss": 0.0, + "distillation_loss": 0.040559072047472, + "epoch": 4.21, + "learning_rate": 1.7260389279512106e-05, + "loss": 0.0407, + "step": 4428, + "task_loss": 0.041948944330215454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893261433941907, + "compression_loss": 0.0, + "distillation_loss": 0.02462448924779892, + "epoch": 4.21, + "learning_rate": 1.7250258208403974e-05, + "loss": 0.0236, + "step": 4429, + "task_loss": 0.014600781723856926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893341547948615, + "compression_loss": 0.0, + "distillation_loss": 0.02943100593984127, + "epoch": 4.21, + "learning_rate": 1.7240128545073753e-05, + "loss": 0.0367, + "step": 4430, + "task_loss": 0.10226895660161972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893421621858222, + "compression_loss": 0.0, + "distillation_loss": 0.23634879291057587, + "epoch": 4.21, + "learning_rate": 1.723000029136156e-05, + "loss": 0.244, + "step": 4431, + "task_loss": 0.3131251037120819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893501655680766, + "compression_loss": 0.0, + "distillation_loss": 0.14374813437461853, + "epoch": 4.21, + "learning_rate": 1.7219873449107233e-05, + "loss": 0.136, + "step": 4432, + "task_loss": 0.06602862477302551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893581649426284, + "compression_loss": 0.0, + "distillation_loss": 0.03242877870798111, + "epoch": 4.21, + "learning_rate": 1.7209748020150362e-05, + "loss": 0.0366, + "step": 4433, + "task_loss": 0.07460720837116241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893661603104813, + "compression_loss": 0.0, + "distillation_loss": 0.028279315680265427, + "epoch": 4.21, + "learning_rate": 1.719962400633028e-05, + "loss": 0.0334, + "step": 4434, + "task_loss": 0.07948870211839676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789374151672639, + "compression_loss": 0.0, + "distillation_loss": 0.021993128582835197, + "epoch": 4.21, + "learning_rate": 1.7189501409486062e-05, + "loss": 0.0263, + "step": 4435, + "task_loss": 0.06503792107105255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893821390301051, + "compression_loss": 0.0, + "distillation_loss": 0.08905947208404541, + "epoch": 4.21, + "learning_rate": 1.717938023145654e-05, + "loss": 0.103, + "step": 4436, + "task_loss": 0.22882232069969177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893901223838834, + "compression_loss": 0.0, + "distillation_loss": 0.02391093783080578, + "epoch": 4.21, + "learning_rate": 1.716926047408025e-05, + "loss": 0.022, + "step": 4437, + "task_loss": 0.004962872713804245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7893981017349775, + "compression_loss": 0.0, + "distillation_loss": 0.1042497307062149, + "epoch": 4.21, + "learning_rate": 1.7159142139195514e-05, + "loss": 0.1155, + "step": 4438, + "task_loss": 0.21651120483875275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894060770843911, + "compression_loss": 0.0, + "distillation_loss": 0.14275360107421875, + "epoch": 4.22, + "learning_rate": 1.7149025228640376e-05, + "loss": 0.1396, + "step": 4439, + "task_loss": 0.11126483976840973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894140484331279, + "compression_loss": 0.0, + "distillation_loss": 0.0388154536485672, + "epoch": 4.22, + "learning_rate": 1.7138909744252608e-05, + "loss": 0.0382, + "step": 4440, + "task_loss": 0.03281474858522415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894220157821915, + "compression_loss": 0.0, + "distillation_loss": 0.07178475707769394, + "epoch": 4.22, + "learning_rate": 1.712879568786975e-05, + "loss": 0.0693, + "step": 4441, + "task_loss": 0.046773068606853485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894299791325856, + "compression_loss": 0.0, + "distillation_loss": 0.02249164879322052, + "epoch": 4.22, + "learning_rate": 1.711868306132906e-05, + "loss": 0.0268, + "step": 4442, + "task_loss": 0.06524014472961426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789437938485314, + "compression_loss": 0.0, + "distillation_loss": 0.028957027941942215, + "epoch": 4.22, + "learning_rate": 1.7108571866467547e-05, + "loss": 0.0456, + "step": 4443, + "task_loss": 0.19559374451637268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894458938413803, + "compression_loss": 0.0, + "distillation_loss": 0.13259674608707428, + "epoch": 4.22, + "learning_rate": 1.709846210512196e-05, + "loss": 0.1353, + "step": 4444, + "task_loss": 0.15936444699764252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894538452017882, + "compression_loss": 0.0, + "distillation_loss": 0.18647846579551697, + "epoch": 4.22, + "learning_rate": 1.7088353779128784e-05, + "loss": 0.1902, + "step": 4445, + "task_loss": 0.22320255637168884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894617925675413, + "compression_loss": 0.0, + "distillation_loss": 0.12089197337627411, + "epoch": 4.22, + "learning_rate": 1.7078246890324257e-05, + "loss": 0.1152, + "step": 4446, + "task_loss": 0.06432140618562698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894697359396434, + "compression_loss": 0.0, + "distillation_loss": 0.016367292031645775, + "epoch": 4.22, + "learning_rate": 1.706814144054433e-05, + "loss": 0.0152, + "step": 4447, + "task_loss": 0.004441702738404274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894776753190981, + "compression_loss": 0.0, + "distillation_loss": 0.02612762153148651, + "epoch": 4.22, + "learning_rate": 1.705803743162471e-05, + "loss": 0.0238, + "step": 4448, + "task_loss": 0.0026707202196121216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894856107069091, + "compression_loss": 0.0, + "distillation_loss": 0.046527355909347534, + "epoch": 4.23, + "learning_rate": 1.704793486540084e-05, + "loss": 0.0495, + "step": 4449, + "task_loss": 0.07587733864784241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7894935421040802, + "compression_loss": 0.0, + "distillation_loss": 0.016864405944943428, + "epoch": 4.23, + "learning_rate": 1.7037833743707892e-05, + "loss": 0.0237, + "step": 4450, + "task_loss": 0.08481664955615997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895014695116149, + "compression_loss": 0.0, + "distillation_loss": 0.026964813470840454, + "epoch": 4.23, + "learning_rate": 1.7027734068380803e-05, + "loss": 0.0318, + "step": 4451, + "task_loss": 0.07501386851072311 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789509392930517, + "compression_loss": 0.0, + "distillation_loss": 0.0899241715669632, + "epoch": 4.23, + "learning_rate": 1.7017635841254194e-05, + "loss": 0.0956, + "step": 4452, + "task_loss": 0.14717119932174683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895173123617901, + "compression_loss": 0.0, + "distillation_loss": 0.07356803119182587, + "epoch": 4.23, + "learning_rate": 1.7007539064162498e-05, + "loss": 0.0707, + "step": 4453, + "task_loss": 0.04454321041703224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789525227806438, + "compression_loss": 0.0, + "distillation_loss": 0.09831836819648743, + "epoch": 4.23, + "learning_rate": 1.6997443738939815e-05, + "loss": 0.0933, + "step": 4454, + "task_loss": 0.04812869057059288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895331392654642, + "compression_loss": 0.0, + "distillation_loss": 0.036545686423778534, + "epoch": 4.23, + "learning_rate": 1.6987349867420024e-05, + "loss": 0.0339, + "step": 4455, + "task_loss": 0.010073903948068619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895410467398726, + "compression_loss": 0.0, + "distillation_loss": 0.15401822328567505, + "epoch": 4.23, + "learning_rate": 1.6977257451436712e-05, + "loss": 0.1472, + "step": 4456, + "task_loss": 0.08590954542160034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895489502306667, + "compression_loss": 0.0, + "distillation_loss": 0.14784236252307892, + "epoch": 4.23, + "learning_rate": 1.6967166492823226e-05, + "loss": 0.1489, + "step": 4457, + "task_loss": 0.15834221243858337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895568497388503, + "compression_loss": 0.0, + "distillation_loss": 0.09763270616531372, + "epoch": 4.23, + "learning_rate": 1.6957076993412636e-05, + "loss": 0.0944, + "step": 4458, + "task_loss": 0.06483699381351471 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789564745265427, + "compression_loss": 0.0, + "distillation_loss": 0.03840739279985428, + "epoch": 4.23, + "learning_rate": 1.694698895503774e-05, + "loss": 0.0356, + "step": 4459, + "task_loss": 0.010582242161035538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895726368114006, + "compression_loss": 0.0, + "distillation_loss": 0.04345633089542389, + "epoch": 4.24, + "learning_rate": 1.6936902379531082e-05, + "loss": 0.0488, + "step": 4460, + "task_loss": 0.09703336656093597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895805243777746, + "compression_loss": 0.0, + "distillation_loss": 0.0363914854824543, + "epoch": 4.24, + "learning_rate": 1.6926817268724938e-05, + "loss": 0.0399, + "step": 4461, + "task_loss": 0.07188586890697479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895884079655529, + "compression_loss": 0.0, + "distillation_loss": 0.14720529317855835, + "epoch": 4.24, + "learning_rate": 1.6916733624451324e-05, + "loss": 0.1541, + "step": 4462, + "task_loss": 0.2162376046180725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7895962875757389, + "compression_loss": 0.0, + "distillation_loss": 0.07019872963428497, + "epoch": 4.24, + "learning_rate": 1.690665144854198e-05, + "loss": 0.0753, + "step": 4463, + "task_loss": 0.12100522965192795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896041632093366, + "compression_loss": 0.0, + "distillation_loss": 0.03042607009410858, + "epoch": 4.24, + "learning_rate": 1.6896570742828367e-05, + "loss": 0.0286, + "step": 4464, + "task_loss": 0.012003440409898758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896120348673494, + "compression_loss": 0.0, + "distillation_loss": 0.10173506289720535, + "epoch": 4.24, + "learning_rate": 1.6886491509141717e-05, + "loss": 0.1217, + "step": 4465, + "task_loss": 0.3017996847629547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896199025507812, + "compression_loss": 0.0, + "distillation_loss": 0.04094619303941727, + "epoch": 4.24, + "learning_rate": 1.6876413749312954e-05, + "loss": 0.0476, + "step": 4466, + "task_loss": 0.10766998678445816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896277662606356, + "compression_loss": 0.0, + "distillation_loss": 0.019710058346390724, + "epoch": 4.24, + "learning_rate": 1.6866337465172754e-05, + "loss": 0.0182, + "step": 4467, + "task_loss": 0.004470134153962135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896356259979161, + "compression_loss": 0.0, + "distillation_loss": 0.04553668200969696, + "epoch": 4.24, + "learning_rate": 1.685626265855153e-05, + "loss": 0.0429, + "step": 4468, + "task_loss": 0.01903415471315384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896434817636268, + "compression_loss": 0.0, + "distillation_loss": 0.03563941270112991, + "epoch": 4.24, + "learning_rate": 1.6846189331279415e-05, + "loss": 0.0426, + "step": 4469, + "task_loss": 0.10529813170433044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789651333558771, + "compression_loss": 0.0, + "distillation_loss": 0.09041808545589447, + "epoch": 4.25, + "learning_rate": 1.683611748518627e-05, + "loss": 0.0915, + "step": 4470, + "task_loss": 0.10150431841611862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896591813843525, + "compression_loss": 0.0, + "distillation_loss": 0.031974878162145615, + "epoch": 4.25, + "learning_rate": 1.6826047122101703e-05, + "loss": 0.0362, + "step": 4471, + "task_loss": 0.07398272305727005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896670252413751, + "compression_loss": 0.0, + "distillation_loss": 0.03553183376789093, + "epoch": 4.25, + "learning_rate": 1.6815978243855052e-05, + "loss": 0.0332, + "step": 4472, + "task_loss": 0.012660248205065727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896748651308424, + "compression_loss": 0.0, + "distillation_loss": 0.1044113039970398, + "epoch": 4.25, + "learning_rate": 1.6805910852275358e-05, + "loss": 0.1011, + "step": 4473, + "task_loss": 0.07151272147893906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789682701053758, + "compression_loss": 0.0, + "distillation_loss": 0.1547268033027649, + "epoch": 4.25, + "learning_rate": 1.6795844949191426e-05, + "loss": 0.1734, + "step": 4474, + "task_loss": 0.34136468172073364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896905330111257, + "compression_loss": 0.0, + "distillation_loss": 0.07073305547237396, + "epoch": 4.25, + "learning_rate": 1.6785780536431772e-05, + "loss": 0.0803, + "step": 4475, + "task_loss": 0.16646496951580048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7896983610039491, + "compression_loss": 0.0, + "distillation_loss": 0.0397295281291008, + "epoch": 4.25, + "learning_rate": 1.677571761582464e-05, + "loss": 0.0411, + "step": 4476, + "task_loss": 0.05347587913274765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789706185033232, + "compression_loss": 0.0, + "distillation_loss": 0.04183419793844223, + "epoch": 4.25, + "learning_rate": 1.6765656189198013e-05, + "loss": 0.0536, + "step": 4477, + "task_loss": 0.1599850058555603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897140050999778, + "compression_loss": 0.0, + "distillation_loss": 0.10868053883314133, + "epoch": 4.25, + "learning_rate": 1.675559625837959e-05, + "loss": 0.1083, + "step": 4478, + "task_loss": 0.10476875305175781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897218212051905, + "compression_loss": 0.0, + "distillation_loss": 0.15036195516586304, + "epoch": 4.25, + "learning_rate": 1.6745537825196823e-05, + "loss": 0.144, + "step": 4479, + "task_loss": 0.08634737133979797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897296333498737, + "compression_loss": 0.0, + "distillation_loss": 0.12207113206386566, + "epoch": 4.25, + "learning_rate": 1.6735480891476855e-05, + "loss": 0.1236, + "step": 4480, + "task_loss": 0.13720379769802094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789737441535031, + "compression_loss": 0.0, + "distillation_loss": 0.030313938856124878, + "epoch": 4.26, + "learning_rate": 1.672542545904659e-05, + "loss": 0.0284, + "step": 4481, + "task_loss": 0.010765250772237778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897452457616662, + "compression_loss": 0.0, + "distillation_loss": 0.08106144517660141, + "epoch": 4.26, + "learning_rate": 1.6715371529732643e-05, + "loss": 0.1023, + "step": 4482, + "task_loss": 0.2930178940296173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897530460307828, + "compression_loss": 0.0, + "distillation_loss": 0.21419498324394226, + "epoch": 4.26, + "learning_rate": 1.6705319105361357e-05, + "loss": 0.2036, + "step": 4483, + "task_loss": 0.10846757143735886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897608423433846, + "compression_loss": 0.0, + "distillation_loss": 0.09862993657588959, + "epoch": 4.26, + "learning_rate": 1.6695268187758797e-05, + "loss": 0.1024, + "step": 4484, + "task_loss": 0.1367725133895874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897686347004753, + "compression_loss": 0.0, + "distillation_loss": 0.05723030865192413, + "epoch": 4.26, + "learning_rate": 1.6685218778750775e-05, + "loss": 0.0594, + "step": 4485, + "task_loss": 0.07874364405870438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897764231030585, + "compression_loss": 0.0, + "distillation_loss": 0.017719998955726624, + "epoch": 4.26, + "learning_rate": 1.66751708801628e-05, + "loss": 0.0165, + "step": 4486, + "task_loss": 0.005893906578421593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789784207552138, + "compression_loss": 0.0, + "distillation_loss": 0.06374501436948776, + "epoch": 4.26, + "learning_rate": 1.6665124493820123e-05, + "loss": 0.0772, + "step": 4487, + "task_loss": 0.19814634323120117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897919880487174, + "compression_loss": 0.0, + "distillation_loss": 0.05392298847436905, + "epoch": 4.26, + "learning_rate": 1.6655079621547727e-05, + "loss": 0.0499, + "step": 4488, + "task_loss": 0.013967299833893776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7897997645938004, + "compression_loss": 0.0, + "distillation_loss": 0.05942397564649582, + "epoch": 4.26, + "learning_rate": 1.6645036265170314e-05, + "loss": 0.0579, + "step": 4489, + "task_loss": 0.04388638213276863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898075371883907, + "compression_loss": 0.0, + "distillation_loss": 0.035995546728372574, + "epoch": 4.26, + "learning_rate": 1.6634994426512296e-05, + "loss": 0.0414, + "step": 4490, + "task_loss": 0.09044382721185684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898153058334919, + "compression_loss": 0.0, + "distillation_loss": 0.07521656155586243, + "epoch": 4.26, + "learning_rate": 1.662495410739783e-05, + "loss": 0.0794, + "step": 4491, + "task_loss": 0.11709287762641907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898230705301078, + "compression_loss": 0.0, + "distillation_loss": 0.04527838155627251, + "epoch": 4.27, + "learning_rate": 1.661491530965078e-05, + "loss": 0.0513, + "step": 4492, + "task_loss": 0.10566423833370209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789830831279242, + "compression_loss": 0.0, + "distillation_loss": 0.017616283148527145, + "epoch": 4.27, + "learning_rate": 1.660487803509475e-05, + "loss": 0.0164, + "step": 4493, + "task_loss": 0.0058811865746974945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898385880818982, + "compression_loss": 0.0, + "distillation_loss": 0.04209098592400551, + "epoch": 4.27, + "learning_rate": 1.6594842285553062e-05, + "loss": 0.0466, + "step": 4494, + "task_loss": 0.08670932054519653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898463409390801, + "compression_loss": 0.0, + "distillation_loss": 0.022504784166812897, + "epoch": 4.27, + "learning_rate": 1.6584808062848743e-05, + "loss": 0.0301, + "step": 4495, + "task_loss": 0.09832281619310379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898540898517915, + "compression_loss": 0.0, + "distillation_loss": 0.09364865720272064, + "epoch": 4.27, + "learning_rate": 1.6574775368804567e-05, + "loss": 0.0985, + "step": 4496, + "task_loss": 0.14257903397083282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898618348210358, + "compression_loss": 0.0, + "distillation_loss": 0.03564376011490822, + "epoch": 4.27, + "learning_rate": 1.656474420524302e-05, + "loss": 0.0466, + "step": 4497, + "task_loss": 0.14488337934017181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898695758478169, + "compression_loss": 0.0, + "distillation_loss": 0.0824960246682167, + "epoch": 4.27, + "learning_rate": 1.6554714573986324e-05, + "loss": 0.0832, + "step": 4498, + "task_loss": 0.08933614194393158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898773129331385, + "compression_loss": 0.0, + "distillation_loss": 0.14729326963424683, + "epoch": 4.27, + "learning_rate": 1.654468647685639e-05, + "loss": 0.1449, + "step": 4499, + "task_loss": 0.12342742830514908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.789885046078004, + "compression_loss": 0.0, + "distillation_loss": 0.02807687409222126, + "epoch": 4.27, + "learning_rate": 1.6534659915674882e-05, + "loss": 0.0259, + "step": 4500, + "task_loss": 0.006013935431838036 + }, + { + "epoch": 4.27, + "eval_accuracy": 0.8979357798165137, + "eval_loss": 0.482150673866272, + "eval_runtime": 18.0094, + "eval_samples_per_second": 48.419, + "eval_steps_per_second": 6.052, + "step": 4500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7898927752834174, + "compression_loss": 0.0, + "distillation_loss": 0.05030520260334015, + "epoch": 4.27, + "learning_rate": 1.6524634892263176e-05, + "loss": 0.049, + "step": 4501, + "task_loss": 0.0376378558576107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899005005503823, + "compression_loss": 0.0, + "distillation_loss": 0.10964888334274292, + "epoch": 4.28, + "learning_rate": 1.651461140844235e-05, + "loss": 0.1056, + "step": 4502, + "task_loss": 0.0688520222902298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899082218799023, + "compression_loss": 0.0, + "distillation_loss": 0.19415023922920227, + "epoch": 4.28, + "learning_rate": 1.6504589466033226e-05, + "loss": 0.1881, + "step": 4503, + "task_loss": 0.133355051279068 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899159392729812, + "compression_loss": 0.0, + "distillation_loss": 0.021605759859085083, + "epoch": 4.28, + "learning_rate": 1.6494569066856343e-05, + "loss": 0.0294, + "step": 4504, + "task_loss": 0.09938529133796692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899236527306225, + "compression_loss": 0.0, + "distillation_loss": 0.025142306461930275, + "epoch": 4.28, + "learning_rate": 1.6484550212731953e-05, + "loss": 0.0293, + "step": 4505, + "task_loss": 0.06672917306423187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899313622538301, + "compression_loss": 0.0, + "distillation_loss": 0.07510918378829956, + "epoch": 4.28, + "learning_rate": 1.6474532905480027e-05, + "loss": 0.0898, + "step": 4506, + "task_loss": 0.22238320112228394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899390678436075, + "compression_loss": 0.0, + "distillation_loss": 0.06910199671983719, + "epoch": 4.28, + "learning_rate": 1.6464517146920255e-05, + "loss": 0.0727, + "step": 4507, + "task_loss": 0.10474320501089096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899467695009585, + "compression_loss": 0.0, + "distillation_loss": 0.023270685225725174, + "epoch": 4.28, + "learning_rate": 1.645450293887206e-05, + "loss": 0.0214, + "step": 4508, + "task_loss": 0.004897216334939003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899544672268868, + "compression_loss": 0.0, + "distillation_loss": 0.01756512001156807, + "epoch": 4.28, + "learning_rate": 1.6444490283154557e-05, + "loss": 0.0164, + "step": 4509, + "task_loss": 0.006135221570730209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899621610223959, + "compression_loss": 0.0, + "distillation_loss": 0.05131693556904793, + "epoch": 4.28, + "learning_rate": 1.6434479181586594e-05, + "loss": 0.067, + "step": 4510, + "task_loss": 0.20782402157783508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899698508884897, + "compression_loss": 0.0, + "distillation_loss": 0.024594193324446678, + "epoch": 4.28, + "learning_rate": 1.6424469635986744e-05, + "loss": 0.0271, + "step": 4511, + "task_loss": 0.04964712634682655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899775368261717, + "compression_loss": 0.0, + "distillation_loss": 0.0458107516169548, + "epoch": 4.28, + "learning_rate": 1.6414461648173284e-05, + "loss": 0.0431, + "step": 4512, + "task_loss": 0.019116627052426338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899852188364457, + "compression_loss": 0.0, + "distillation_loss": 0.028601201251149178, + "epoch": 4.29, + "learning_rate": 1.6404455219964203e-05, + "loss": 0.0261, + "step": 4513, + "task_loss": 0.0036617927253246307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7899928969203154, + "compression_loss": 0.0, + "distillation_loss": 0.07332905381917953, + "epoch": 4.29, + "learning_rate": 1.6394450353177242e-05, + "loss": 0.0782, + "step": 4514, + "task_loss": 0.12209247052669525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900005710787844, + "compression_loss": 0.0, + "distillation_loss": 0.051219768822193146, + "epoch": 4.29, + "learning_rate": 1.6384447049629816e-05, + "loss": 0.0545, + "step": 4515, + "task_loss": 0.08445730060338974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900082413128564, + "compression_loss": 0.0, + "distillation_loss": 0.12552213668823242, + "epoch": 4.29, + "learning_rate": 1.6374445311139074e-05, + "loss": 0.1197, + "step": 4516, + "task_loss": 0.06750532984733582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900159076235351, + "compression_loss": 0.0, + "distillation_loss": 0.231977179646492, + "epoch": 4.29, + "learning_rate": 1.6364445139521883e-05, + "loss": 0.2244, + "step": 4517, + "task_loss": 0.15585389733314514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900235700118242, + "compression_loss": 0.0, + "distillation_loss": 0.027736982330679893, + "epoch": 4.29, + "learning_rate": 1.635444653659483e-05, + "loss": 0.0257, + "step": 4518, + "task_loss": 0.007768923416733742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900312284787274, + "compression_loss": 0.0, + "distillation_loss": 0.050665318965911865, + "epoch": 4.29, + "learning_rate": 1.6344449504174193e-05, + "loss": 0.0625, + "step": 4519, + "task_loss": 0.16943272948265076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900388830252483, + "compression_loss": 0.0, + "distillation_loss": 0.03559410944581032, + "epoch": 4.29, + "learning_rate": 1.6334454044075988e-05, + "loss": 0.0405, + "step": 4520, + "task_loss": 0.08508558571338654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900465336523906, + "compression_loss": 0.0, + "distillation_loss": 0.05828773230314255, + "epoch": 4.29, + "learning_rate": 1.6324460158115942e-05, + "loss": 0.0552, + "step": 4521, + "task_loss": 0.027413969859480858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900541803611582, + "compression_loss": 0.0, + "distillation_loss": 0.2908591628074646, + "epoch": 4.29, + "learning_rate": 1.6314467848109483e-05, + "loss": 0.2765, + "step": 4522, + "task_loss": 0.14718219637870789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900618231525544, + "compression_loss": 0.0, + "distillation_loss": 0.013387423008680344, + "epoch": 4.3, + "learning_rate": 1.6304477115871776e-05, + "loss": 0.0194, + "step": 4523, + "task_loss": 0.07351858913898468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900694620275831, + "compression_loss": 0.0, + "distillation_loss": 0.13720417022705078, + "epoch": 4.3, + "learning_rate": 1.6294487963217677e-05, + "loss": 0.129, + "step": 4524, + "task_loss": 0.05525549128651619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900770969872479, + "compression_loss": 0.0, + "distillation_loss": 0.024422302842140198, + "epoch": 4.3, + "learning_rate": 1.6284500391961772e-05, + "loss": 0.0328, + "step": 4525, + "task_loss": 0.10812153667211533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900847280325526, + "compression_loss": 0.0, + "distillation_loss": 0.13294796645641327, + "epoch": 4.3, + "learning_rate": 1.627451440391834e-05, + "loss": 0.1583, + "step": 4526, + "task_loss": 0.38683199882507324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900923551645008, + "compression_loss": 0.0, + "distillation_loss": 0.025384480133652687, + "epoch": 4.3, + "learning_rate": 1.626453000090139e-05, + "loss": 0.0332, + "step": 4527, + "task_loss": 0.10327724367380142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7900999783840963, + "compression_loss": 0.0, + "distillation_loss": 0.1557433307170868, + "epoch": 4.3, + "learning_rate": 1.625454718472464e-05, + "loss": 0.1661, + "step": 4528, + "task_loss": 0.2594253718852997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901075976923425, + "compression_loss": 0.0, + "distillation_loss": 0.07799336314201355, + "epoch": 4.3, + "learning_rate": 1.6244565957201506e-05, + "loss": 0.0748, + "step": 4529, + "task_loss": 0.04625723510980606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901152130902435, + "compression_loss": 0.0, + "distillation_loss": 0.03408731520175934, + "epoch": 4.3, + "learning_rate": 1.6234586320145125e-05, + "loss": 0.0314, + "step": 4530, + "task_loss": 0.007508426904678345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901228245788026, + "compression_loss": 0.0, + "distillation_loss": 0.023436367511749268, + "epoch": 4.3, + "learning_rate": 1.6224608275368364e-05, + "loss": 0.0238, + "step": 4531, + "task_loss": 0.026913581416010857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901304321590236, + "compression_loss": 0.0, + "distillation_loss": 0.02828259766101837, + "epoch": 4.3, + "learning_rate": 1.6214631824683773e-05, + "loss": 0.026, + "step": 4532, + "task_loss": 0.005199538543820381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901380358319102, + "compression_loss": 0.0, + "distillation_loss": 0.02083125337958336, + "epoch": 4.3, + "learning_rate": 1.6204656969903618e-05, + "loss": 0.0344, + "step": 4533, + "task_loss": 0.15678700804710388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901456355984662, + "compression_loss": 0.0, + "distillation_loss": 0.023461364209651947, + "epoch": 4.31, + "learning_rate": 1.6194683712839885e-05, + "loss": 0.0259, + "step": 4534, + "task_loss": 0.048007965087890625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901532314596952, + "compression_loss": 0.0, + "distillation_loss": 0.03079340234398842, + "epoch": 4.31, + "learning_rate": 1.618471205530427e-05, + "loss": 0.0345, + "step": 4535, + "task_loss": 0.0679367259144783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901608234166008, + "compression_loss": 0.0, + "distillation_loss": 0.034932032227516174, + "epoch": 4.31, + "learning_rate": 1.6174741999108157e-05, + "loss": 0.0328, + "step": 4536, + "task_loss": 0.013840943574905396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901684114701867, + "compression_loss": 0.0, + "distillation_loss": 0.09038272500038147, + "epoch": 4.31, + "learning_rate": 1.6164773546062667e-05, + "loss": 0.0854, + "step": 4537, + "task_loss": 0.04105079919099808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901759956214566, + "compression_loss": 0.0, + "distillation_loss": 0.056141406297683716, + "epoch": 4.31, + "learning_rate": 1.6154806697978608e-05, + "loss": 0.0534, + "step": 4538, + "task_loss": 0.028462648391723633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901835758714143, + "compression_loss": 0.0, + "distillation_loss": 0.02680385112762451, + "epoch": 4.31, + "learning_rate": 1.614484145666651e-05, + "loss": 0.0247, + "step": 4539, + "task_loss": 0.005452977493405342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901911522210633, + "compression_loss": 0.0, + "distillation_loss": 0.036854278296232224, + "epoch": 4.31, + "learning_rate": 1.613487782393661e-05, + "loss": 0.0338, + "step": 4540, + "task_loss": 0.006693465635180473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7901987246714074, + "compression_loss": 0.0, + "distillation_loss": 0.019134201109409332, + "epoch": 4.31, + "learning_rate": 1.6124915801598852e-05, + "loss": 0.0225, + "step": 4541, + "task_loss": 0.05329040437936783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902062932234503, + "compression_loss": 0.0, + "distillation_loss": 0.2091977298259735, + "epoch": 4.31, + "learning_rate": 1.6114955391462878e-05, + "loss": 0.2147, + "step": 4542, + "task_loss": 0.2644974887371063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902138578781955, + "compression_loss": 0.0, + "distillation_loss": 0.07729081809520721, + "epoch": 4.31, + "learning_rate": 1.6104996595338047e-05, + "loss": 0.0728, + "step": 4543, + "task_loss": 0.03260170668363571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790221418636647, + "compression_loss": 0.0, + "distillation_loss": 0.1044703871011734, + "epoch": 4.32, + "learning_rate": 1.609503941503343e-05, + "loss": 0.107, + "step": 4544, + "task_loss": 0.12950289249420166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902289754998082, + "compression_loss": 0.0, + "distillation_loss": 0.03879034146666527, + "epoch": 4.32, + "learning_rate": 1.6085083852357786e-05, + "loss": 0.0542, + "step": 4545, + "task_loss": 0.1928063929080963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902365284686829, + "compression_loss": 0.0, + "distillation_loss": 0.13962599635124207, + "epoch": 4.32, + "learning_rate": 1.6075129909119592e-05, + "loss": 0.1528, + "step": 4546, + "task_loss": 0.27165764570236206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902440775442747, + "compression_loss": 0.0, + "distillation_loss": 0.05956469848752022, + "epoch": 4.32, + "learning_rate": 1.6065177587127027e-05, + "loss": 0.0687, + "step": 4547, + "task_loss": 0.15051259100437164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902516227275874, + "compression_loss": 0.0, + "distillation_loss": 0.022979095578193665, + "epoch": 4.32, + "learning_rate": 1.6055226888188e-05, + "loss": 0.0238, + "step": 4548, + "task_loss": 0.030920779332518578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902591640196246, + "compression_loss": 0.0, + "distillation_loss": 0.015515048988163471, + "epoch": 4.32, + "learning_rate": 1.604527781411008e-05, + "loss": 0.0215, + "step": 4549, + "task_loss": 0.07502803206443787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79026670142139, + "compression_loss": 0.0, + "distillation_loss": 0.025966979563236237, + "epoch": 4.32, + "learning_rate": 1.6035330366700567e-05, + "loss": 0.039, + "step": 4550, + "task_loss": 0.15588192641735077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902742349338874, + "compression_loss": 0.0, + "distillation_loss": 0.011777522042393684, + "epoch": 4.32, + "learning_rate": 1.6025384547766477e-05, + "loss": 0.011, + "step": 4551, + "task_loss": 0.003892483189702034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902817645581203, + "compression_loss": 0.0, + "distillation_loss": 0.018658041954040527, + "epoch": 4.32, + "learning_rate": 1.6015440359114497e-05, + "loss": 0.0173, + "step": 4552, + "task_loss": 0.004857182502746582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902892902950923, + "compression_loss": 0.0, + "distillation_loss": 0.052327413111925125, + "epoch": 4.32, + "learning_rate": 1.600549780255105e-05, + "loss": 0.0637, + "step": 4553, + "task_loss": 0.1657162308692932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7902968121458074, + "compression_loss": 0.0, + "distillation_loss": 0.22867000102996826, + "epoch": 4.32, + "learning_rate": 1.5995556879882246e-05, + "loss": 0.2293, + "step": 4554, + "task_loss": 0.23492863774299622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903043301112691, + "compression_loss": 0.0, + "distillation_loss": 0.014244308695197105, + "epoch": 4.33, + "learning_rate": 1.59856175929139e-05, + "loss": 0.0134, + "step": 4555, + "task_loss": 0.006260443478822708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903118441924811, + "compression_loss": 0.0, + "distillation_loss": 0.06026815250515938, + "epoch": 4.33, + "learning_rate": 1.597567994345152e-05, + "loss": 0.0547, + "step": 4556, + "task_loss": 0.004877146333456039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903193543904471, + "compression_loss": 0.0, + "distillation_loss": 0.04478321969509125, + "epoch": 4.33, + "learning_rate": 1.5965743933300352e-05, + "loss": 0.0435, + "step": 4557, + "task_loss": 0.03176284581422806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903268607061708, + "compression_loss": 0.0, + "distillation_loss": 0.044730305671691895, + "epoch": 4.33, + "learning_rate": 1.595580956426531e-05, + "loss": 0.0555, + "step": 4558, + "task_loss": 0.1524236649274826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903343631406556, + "compression_loss": 0.0, + "distillation_loss": 0.022832248359918594, + "epoch": 4.33, + "learning_rate": 1.5945876838151014e-05, + "loss": 0.0285, + "step": 4559, + "task_loss": 0.07918474823236465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903418616949057, + "compression_loss": 0.0, + "distillation_loss": 0.13835708796977997, + "epoch": 4.33, + "learning_rate": 1.5935945756761794e-05, + "loss": 0.1415, + "step": 4560, + "task_loss": 0.17012692987918854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903493563699244, + "compression_loss": 0.0, + "distillation_loss": 0.06880520284175873, + "epoch": 4.33, + "learning_rate": 1.592601632190169e-05, + "loss": 0.0639, + "step": 4561, + "task_loss": 0.019452493637800217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903568471667155, + "compression_loss": 0.0, + "distillation_loss": 0.046133361756801605, + "epoch": 4.33, + "learning_rate": 1.591608853537441e-05, + "loss": 0.0463, + "step": 4562, + "task_loss": 0.04781344532966614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903643340862827, + "compression_loss": 0.0, + "distillation_loss": 0.038969479501247406, + "epoch": 4.33, + "learning_rate": 1.5906162398983397e-05, + "loss": 0.05, + "step": 4563, + "task_loss": 0.14975658059120178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903718171296297, + "compression_loss": 0.0, + "distillation_loss": 0.07891889661550522, + "epoch": 4.33, + "learning_rate": 1.5896237914531793e-05, + "loss": 0.0827, + "step": 4564, + "task_loss": 0.1163320243358612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79037929629776, + "compression_loss": 0.0, + "distillation_loss": 0.014497898519039154, + "epoch": 4.34, + "learning_rate": 1.58863150838224e-05, + "loss": 0.0135, + "step": 4565, + "task_loss": 0.004507582634687424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903867715916776, + "compression_loss": 0.0, + "distillation_loss": 0.07853427529335022, + "epoch": 4.34, + "learning_rate": 1.5876393908657766e-05, + "loss": 0.0769, + "step": 4566, + "task_loss": 0.06254800409078598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7903942430123858, + "compression_loss": 0.0, + "distillation_loss": 0.04548817127943039, + "epoch": 4.34, + "learning_rate": 1.5866474390840125e-05, + "loss": 0.0488, + "step": 4567, + "task_loss": 0.07903194427490234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904017105608887, + "compression_loss": 0.0, + "distillation_loss": 0.01196481566876173, + "epoch": 4.34, + "learning_rate": 1.58565565321714e-05, + "loss": 0.0192, + "step": 4568, + "task_loss": 0.0844261422753334 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904091742381896, + "compression_loss": 0.0, + "distillation_loss": 0.0883462056517601, + "epoch": 4.34, + "learning_rate": 1.584664033445321e-05, + "loss": 0.1073, + "step": 4569, + "task_loss": 0.27824994921684265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904166340452924, + "compression_loss": 0.0, + "distillation_loss": 0.06949096918106079, + "epoch": 4.34, + "learning_rate": 1.583672579948689e-05, + "loss": 0.0762, + "step": 4570, + "task_loss": 0.13672670722007751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904240899832008, + "compression_loss": 0.0, + "distillation_loss": 0.017165692523121834, + "epoch": 4.34, + "learning_rate": 1.582681292907346e-05, + "loss": 0.0189, + "step": 4571, + "task_loss": 0.03450107201933861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904315420529183, + "compression_loss": 0.0, + "distillation_loss": 0.028937380760908127, + "epoch": 4.34, + "learning_rate": 1.581690172501364e-05, + "loss": 0.0334, + "step": 4572, + "task_loss": 0.07330554723739624 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904389902554487, + "compression_loss": 0.0, + "distillation_loss": 0.04542282223701477, + "epoch": 4.34, + "learning_rate": 1.5806992189107838e-05, + "loss": 0.0445, + "step": 4573, + "task_loss": 0.036514170467853546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904464345917958, + "compression_loss": 0.0, + "distillation_loss": 0.11212094128131866, + "epoch": 4.34, + "learning_rate": 1.5797084323156186e-05, + "loss": 0.1122, + "step": 4574, + "task_loss": 0.11315297335386276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790453875062963, + "compression_loss": 0.0, + "distillation_loss": 0.09616538882255554, + "epoch": 4.34, + "learning_rate": 1.5787178128958496e-05, + "loss": 0.0895, + "step": 4575, + "task_loss": 0.029983239248394966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904613116699543, + "compression_loss": 0.0, + "distillation_loss": 0.03785378485918045, + "epoch": 4.35, + "learning_rate": 1.577727360831426e-05, + "loss": 0.0441, + "step": 4576, + "task_loss": 0.1002790629863739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904687444137731, + "compression_loss": 0.0, + "distillation_loss": 0.024910183623433113, + "epoch": 4.35, + "learning_rate": 1.5767370763022694e-05, + "loss": 0.0315, + "step": 4577, + "task_loss": 0.09050406515598297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904761732954233, + "compression_loss": 0.0, + "distillation_loss": 0.07750061899423599, + "epoch": 4.35, + "learning_rate": 1.5757469594882692e-05, + "loss": 0.0824, + "step": 4578, + "task_loss": 0.12608473002910614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904835983159084, + "compression_loss": 0.0, + "distillation_loss": 0.03907743841409683, + "epoch": 4.35, + "learning_rate": 1.574757010569285e-05, + "loss": 0.0363, + "step": 4579, + "task_loss": 0.011346584185957909 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904910194762322, + "compression_loss": 0.0, + "distillation_loss": 0.04754206910729408, + "epoch": 4.35, + "learning_rate": 1.5737672297251464e-05, + "loss": 0.0501, + "step": 4580, + "task_loss": 0.07262822985649109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7904984367773984, + "compression_loss": 0.0, + "distillation_loss": 0.07429815083742142, + "epoch": 4.35, + "learning_rate": 1.5727776171356506e-05, + "loss": 0.0701, + "step": 4581, + "task_loss": 0.032718729227781296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905058502204106, + "compression_loss": 0.0, + "distillation_loss": 0.10426107048988342, + "epoch": 4.35, + "learning_rate": 1.571788172980566e-05, + "loss": 0.1008, + "step": 4582, + "task_loss": 0.06919807940721512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905132598062725, + "compression_loss": 0.0, + "distillation_loss": 0.21805521845817566, + "epoch": 4.35, + "learning_rate": 1.5707988974396304e-05, + "loss": 0.2072, + "step": 4583, + "task_loss": 0.10992544889450073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905206655359879, + "compression_loss": 0.0, + "distillation_loss": 0.10667760670185089, + "epoch": 4.35, + "learning_rate": 1.56980979069255e-05, + "loss": 0.1054, + "step": 4584, + "task_loss": 0.09397716075181961 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905280674105603, + "compression_loss": 0.0, + "distillation_loss": 0.01430472545325756, + "epoch": 4.35, + "learning_rate": 1.568820852919002e-05, + "loss": 0.0183, + "step": 4585, + "task_loss": 0.05472009256482124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905354654309934, + "compression_loss": 0.0, + "distillation_loss": 0.037967242300510406, + "epoch": 4.36, + "learning_rate": 1.5678320842986295e-05, + "loss": 0.0446, + "step": 4586, + "task_loss": 0.10416756570339203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905428595982911, + "compression_loss": 0.0, + "distillation_loss": 0.07452750205993652, + "epoch": 4.36, + "learning_rate": 1.5668434850110493e-05, + "loss": 0.0828, + "step": 4587, + "task_loss": 0.1572713553905487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905502499134568, + "compression_loss": 0.0, + "distillation_loss": 0.018898198381066322, + "epoch": 4.36, + "learning_rate": 1.565855055235843e-05, + "loss": 0.0174, + "step": 4588, + "task_loss": 0.004319200292229652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905576363774943, + "compression_loss": 0.0, + "distillation_loss": 0.017305273562669754, + "epoch": 4.36, + "learning_rate": 1.5648667951525653e-05, + "loss": 0.0243, + "step": 4589, + "task_loss": 0.08709577471017838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905650189914074, + "compression_loss": 0.0, + "distillation_loss": 0.02825682982802391, + "epoch": 4.36, + "learning_rate": 1.5638787049407382e-05, + "loss": 0.035, + "step": 4590, + "task_loss": 0.09588130563497543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905723977561996, + "compression_loss": 0.0, + "distillation_loss": 0.03142907842993736, + "epoch": 4.36, + "learning_rate": 1.5628907847798517e-05, + "loss": 0.036, + "step": 4591, + "task_loss": 0.07705745846033096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905797726728746, + "compression_loss": 0.0, + "distillation_loss": 0.040951065719127655, + "epoch": 4.36, + "learning_rate": 1.5619030348493684e-05, + "loss": 0.0562, + "step": 4592, + "task_loss": 0.19311881065368652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905871437424362, + "compression_loss": 0.0, + "distillation_loss": 0.023970093578100204, + "epoch": 4.36, + "learning_rate": 1.5609154553287163e-05, + "loss": 0.032, + "step": 4593, + "task_loss": 0.10465425252914429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7905945109658881, + "compression_loss": 0.0, + "distillation_loss": 0.03335358202457428, + "epoch": 4.36, + "learning_rate": 1.5599280463972953e-05, + "loss": 0.0312, + "step": 4594, + "task_loss": 0.011852225288748741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906018743442338, + "compression_loss": 0.0, + "distillation_loss": 0.10270802676677704, + "epoch": 4.36, + "learning_rate": 1.558940808234471e-05, + "loss": 0.1207, + "step": 4595, + "task_loss": 0.2826935052871704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790609233878477, + "compression_loss": 0.0, + "distillation_loss": 0.11305034160614014, + "epoch": 4.36, + "learning_rate": 1.5579537410195817e-05, + "loss": 0.1194, + "step": 4596, + "task_loss": 0.1764708012342453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906165895696217, + "compression_loss": 0.0, + "distillation_loss": 0.02198610082268715, + "epoch": 4.37, + "learning_rate": 1.5569668449319323e-05, + "loss": 0.0204, + "step": 4597, + "task_loss": 0.005980312824249268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906239414186712, + "compression_loss": 0.0, + "distillation_loss": 0.03549554944038391, + "epoch": 4.37, + "learning_rate": 1.5559801201507968e-05, + "loss": 0.0526, + "step": 4598, + "task_loss": 0.20651255548000336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906312894266293, + "compression_loss": 0.0, + "distillation_loss": 0.09966300427913666, + "epoch": 4.37, + "learning_rate": 1.554993566855418e-05, + "loss": 0.1053, + "step": 4599, + "task_loss": 0.156356543302536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906386335944998, + "compression_loss": 0.0, + "distillation_loss": 0.0843530148267746, + "epoch": 4.37, + "learning_rate": 1.5540071852250106e-05, + "loss": 0.0909, + "step": 4600, + "task_loss": 0.14952674508094788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906459739232862, + "compression_loss": 0.0, + "distillation_loss": 0.08812104910612106, + "epoch": 4.37, + "learning_rate": 1.5530209754387537e-05, + "loss": 0.0843, + "step": 4601, + "task_loss": 0.04979713633656502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906533104139923, + "compression_loss": 0.0, + "distillation_loss": 0.0816584974527359, + "epoch": 4.37, + "learning_rate": 1.552034937675797e-05, + "loss": 0.0981, + "step": 4602, + "task_loss": 0.24617856740951538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906606430676218, + "compression_loss": 0.0, + "distillation_loss": 0.022141676396131516, + "epoch": 4.37, + "learning_rate": 1.5510490721152592e-05, + "loss": 0.0206, + "step": 4603, + "task_loss": 0.006470389664173126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906679718851783, + "compression_loss": 0.0, + "distillation_loss": 0.039304304867982864, + "epoch": 4.37, + "learning_rate": 1.550063378936228e-05, + "loss": 0.0401, + "step": 4604, + "task_loss": 0.047406185418367386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906752968676656, + "compression_loss": 0.0, + "distillation_loss": 0.03900455683469772, + "epoch": 4.37, + "learning_rate": 1.549077858317759e-05, + "loss": 0.0587, + "step": 4605, + "task_loss": 0.23572197556495667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906826180160873, + "compression_loss": 0.0, + "distillation_loss": 0.01960546150803566, + "epoch": 4.37, + "learning_rate": 1.5480925104388762e-05, + "loss": 0.0222, + "step": 4606, + "task_loss": 0.045594025403261185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790689935331447, + "compression_loss": 0.0, + "distillation_loss": 0.011018482968211174, + "epoch": 4.38, + "learning_rate": 1.547107335478574e-05, + "loss": 0.0162, + "step": 4607, + "task_loss": 0.06283082067966461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7906972488147486, + "compression_loss": 0.0, + "distillation_loss": 0.13586799800395966, + "epoch": 4.38, + "learning_rate": 1.5461223336158127e-05, + "loss": 0.1389, + "step": 4608, + "task_loss": 0.1658676415681839 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907045584669955, + "compression_loss": 0.0, + "distillation_loss": 0.061904579401016235, + "epoch": 4.38, + "learning_rate": 1.5451375050295235e-05, + "loss": 0.0694, + "step": 4609, + "task_loss": 0.1364220827817917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907118642891916, + "compression_loss": 0.0, + "distillation_loss": 0.06705223023891449, + "epoch": 4.38, + "learning_rate": 1.5441528498986053e-05, + "loss": 0.0632, + "step": 4610, + "task_loss": 0.028953341767191887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907191662823405, + "compression_loss": 0.0, + "distillation_loss": 0.06535607576370239, + "epoch": 4.38, + "learning_rate": 1.543168368401926e-05, + "loss": 0.0687, + "step": 4611, + "task_loss": 0.09915080666542053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907264644474459, + "compression_loss": 0.0, + "distillation_loss": 0.041197728365659714, + "epoch": 4.38, + "learning_rate": 1.5421840607183203e-05, + "loss": 0.0381, + "step": 4612, + "task_loss": 0.009746959432959557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907337587855114, + "compression_loss": 0.0, + "distillation_loss": 0.009639294818043709, + "epoch": 4.38, + "learning_rate": 1.5411999270265924e-05, + "loss": 0.0091, + "step": 4613, + "task_loss": 0.004122687503695488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790741049297541, + "compression_loss": 0.0, + "distillation_loss": 0.04923541843891144, + "epoch": 4.38, + "learning_rate": 1.5402159675055166e-05, + "loss": 0.0499, + "step": 4614, + "task_loss": 0.05560116469860077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790748335984538, + "compression_loss": 0.0, + "distillation_loss": 0.062305059283971786, + "epoch": 4.38, + "learning_rate": 1.5392321823338318e-05, + "loss": 0.0646, + "step": 4615, + "task_loss": 0.08488155901432037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907556188475061, + "compression_loss": 0.0, + "distillation_loss": 0.08541512489318848, + "epoch": 4.38, + "learning_rate": 1.5382485716902486e-05, + "loss": 0.0906, + "step": 4616, + "task_loss": 0.13751107454299927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907628978874492, + "compression_loss": 0.0, + "distillation_loss": 0.01606060564517975, + "epoch": 4.38, + "learning_rate": 1.537265135753443e-05, + "loss": 0.0216, + "step": 4617, + "task_loss": 0.07104544341564178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.790770173105371, + "compression_loss": 0.0, + "distillation_loss": 0.013350674882531166, + "epoch": 4.39, + "learning_rate": 1.536281874702063e-05, + "loss": 0.0173, + "step": 4618, + "task_loss": 0.05334080010652542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907774445022749, + "compression_loss": 0.0, + "distillation_loss": 0.03925604373216629, + "epoch": 4.39, + "learning_rate": 1.535298788714722e-05, + "loss": 0.0443, + "step": 4619, + "task_loss": 0.08980512619018555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907847120791649, + "compression_loss": 0.0, + "distillation_loss": 0.07377579063177109, + "epoch": 4.39, + "learning_rate": 1.5343158779700016e-05, + "loss": 0.0715, + "step": 4620, + "task_loss": 0.050635963678359985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907919758370445, + "compression_loss": 0.0, + "distillation_loss": 0.034185461699962616, + "epoch": 4.39, + "learning_rate": 1.533333142646453e-05, + "loss": 0.0327, + "step": 4621, + "task_loss": 0.019142286852002144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7907992357769174, + "compression_loss": 0.0, + "distillation_loss": 0.08377894014120102, + "epoch": 4.39, + "learning_rate": 1.5323505829225947e-05, + "loss": 0.0824, + "step": 4622, + "task_loss": 0.07016059011220932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908064918997872, + "compression_loss": 0.0, + "distillation_loss": 0.13712866604328156, + "epoch": 4.39, + "learning_rate": 1.5313681989769136e-05, + "loss": 0.1362, + "step": 4623, + "task_loss": 0.12798915803432465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908137442066578, + "compression_loss": 0.0, + "distillation_loss": 0.08357212692499161, + "epoch": 4.39, + "learning_rate": 1.530385990987863e-05, + "loss": 0.0788, + "step": 4624, + "task_loss": 0.035898420959711075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908209926985328, + "compression_loss": 0.0, + "distillation_loss": 0.025412458926439285, + "epoch": 4.39, + "learning_rate": 1.529403959133867e-05, + "loss": 0.0407, + "step": 4625, + "task_loss": 0.17801359295845032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908282373764158, + "compression_loss": 0.0, + "distillation_loss": 0.020880520343780518, + "epoch": 4.39, + "learning_rate": 1.5284221035933166e-05, + "loss": 0.0194, + "step": 4626, + "task_loss": 0.006218817085027695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908354782413105, + "compression_loss": 0.0, + "distillation_loss": 0.09893855452537537, + "epoch": 4.39, + "learning_rate": 1.5274404245445704e-05, + "loss": 0.0946, + "step": 4627, + "task_loss": 0.05531448870897293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908427152942207, + "compression_loss": 0.0, + "distillation_loss": 0.13214893639087677, + "epoch": 4.4, + "learning_rate": 1.5264589221659553e-05, + "loss": 0.1336, + "step": 4628, + "task_loss": 0.14715173840522766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908499485361499, + "compression_loss": 0.0, + "distillation_loss": 0.11516699194908142, + "epoch": 4.4, + "learning_rate": 1.5254775966357653e-05, + "loss": 0.1095, + "step": 4629, + "task_loss": 0.058957893401384354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908571779681018, + "compression_loss": 0.0, + "distillation_loss": 0.08969461172819138, + "epoch": 4.4, + "learning_rate": 1.5244964481322637e-05, + "loss": 0.0989, + "step": 4630, + "task_loss": 0.18196004629135132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908644035910803, + "compression_loss": 0.0, + "distillation_loss": 0.1797245740890503, + "epoch": 4.4, + "learning_rate": 1.5235154768336795e-05, + "loss": 0.1714, + "step": 4631, + "task_loss": 0.09689280390739441 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908716254060889, + "compression_loss": 0.0, + "distillation_loss": 0.08713481575250626, + "epoch": 4.4, + "learning_rate": 1.5225346829182121e-05, + "loss": 0.0895, + "step": 4632, + "task_loss": 0.1106729805469513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908788434141313, + "compression_loss": 0.0, + "distillation_loss": 0.05905129015445709, + "epoch": 4.4, + "learning_rate": 1.5215540665640277e-05, + "loss": 0.0611, + "step": 4633, + "task_loss": 0.07935148477554321 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908860576162112, + "compression_loss": 0.0, + "distillation_loss": 0.08967553079128265, + "epoch": 4.4, + "learning_rate": 1.5205736279492574e-05, + "loss": 0.0875, + "step": 4634, + "task_loss": 0.0675487220287323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7908932680133324, + "compression_loss": 0.0, + "distillation_loss": 0.07612395286560059, + "epoch": 4.4, + "learning_rate": 1.5195933672520064e-05, + "loss": 0.0825, + "step": 4635, + "task_loss": 0.13981005549430847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909004746064983, + "compression_loss": 0.0, + "distillation_loss": 0.18954381346702576, + "epoch": 4.4, + "learning_rate": 1.5186132846503412e-05, + "loss": 0.1939, + "step": 4636, + "task_loss": 0.23316198587417603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909076773967129, + "compression_loss": 0.0, + "distillation_loss": 0.02852441743016243, + "epoch": 4.4, + "learning_rate": 1.5176333803222998e-05, + "loss": 0.0353, + "step": 4637, + "task_loss": 0.09584514796733856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909148763849796, + "compression_loss": 0.0, + "distillation_loss": 0.04975371062755585, + "epoch": 4.4, + "learning_rate": 1.5166536544458856e-05, + "loss": 0.0526, + "step": 4638, + "task_loss": 0.0777263194322586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909220715723022, + "compression_loss": 0.0, + "distillation_loss": 0.17351922392845154, + "epoch": 4.41, + "learning_rate": 1.515674107199071e-05, + "loss": 0.1641, + "step": 4639, + "task_loss": 0.07936342060565948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909292629596845, + "compression_loss": 0.0, + "distillation_loss": 0.028591414913535118, + "epoch": 4.41, + "learning_rate": 1.5146947387597956e-05, + "loss": 0.0428, + "step": 4640, + "task_loss": 0.17051829397678375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79093645054813, + "compression_loss": 0.0, + "distillation_loss": 0.03785558417439461, + "epoch": 4.41, + "learning_rate": 1.513715549305966e-05, + "loss": 0.0441, + "step": 4641, + "task_loss": 0.10044631361961365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909436343386426, + "compression_loss": 0.0, + "distillation_loss": 0.09519485384225845, + "epoch": 4.41, + "learning_rate": 1.512736539015457e-05, + "loss": 0.0916, + "step": 4642, + "task_loss": 0.05921884626150131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909508143322257, + "compression_loss": 0.0, + "distillation_loss": 0.034322589635849, + "epoch": 4.41, + "learning_rate": 1.5117577080661094e-05, + "loss": 0.0324, + "step": 4643, + "task_loss": 0.014751961454749107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909579905298831, + "compression_loss": 0.0, + "distillation_loss": 0.01889497973024845, + "epoch": 4.41, + "learning_rate": 1.5107790566357347e-05, + "loss": 0.0176, + "step": 4644, + "task_loss": 0.005802595987915993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909651629326186, + "compression_loss": 0.0, + "distillation_loss": 0.09566991776227951, + "epoch": 4.41, + "learning_rate": 1.509800584902108e-05, + "loss": 0.0992, + "step": 4645, + "task_loss": 0.13102050125598907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909723315414358, + "compression_loss": 0.0, + "distillation_loss": 0.06106077879667282, + "epoch": 4.41, + "learning_rate": 1.508822293042974e-05, + "loss": 0.0612, + "step": 4646, + "task_loss": 0.06258445233106613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909794963573382, + "compression_loss": 0.0, + "distillation_loss": 0.046314168721437454, + "epoch": 4.41, + "learning_rate": 1.5078441812360445e-05, + "loss": 0.0432, + "step": 4647, + "task_loss": 0.01564324088394642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909866573813299, + "compression_loss": 0.0, + "distillation_loss": 0.05536476522684097, + "epoch": 4.41, + "learning_rate": 1.5068662496589975e-05, + "loss": 0.0511, + "step": 4648, + "task_loss": 0.012430863454937935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7909938146144142, + "compression_loss": 0.0, + "distillation_loss": 0.08107525110244751, + "epoch": 4.42, + "learning_rate": 1.5058884984894788e-05, + "loss": 0.0793, + "step": 4649, + "task_loss": 0.06296955794095993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910009680575949, + "compression_loss": 0.0, + "distillation_loss": 0.04533292353153229, + "epoch": 4.42, + "learning_rate": 1.5049109279051026e-05, + "loss": 0.0599, + "step": 4650, + "task_loss": 0.1911742091178894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910081177118757, + "compression_loss": 0.0, + "distillation_loss": 0.05968482419848442, + "epoch": 4.42, + "learning_rate": 1.503933538083448e-05, + "loss": 0.0574, + "step": 4651, + "task_loss": 0.03725713863968849 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910152635782604, + "compression_loss": 0.0, + "distillation_loss": 0.04093274474143982, + "epoch": 4.42, + "learning_rate": 1.5029563292020637e-05, + "loss": 0.0498, + "step": 4652, + "task_loss": 0.12984903156757355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910224056577524, + "compression_loss": 0.0, + "distillation_loss": 0.0636286810040474, + "epoch": 4.42, + "learning_rate": 1.5019793014384643e-05, + "loss": 0.0631, + "step": 4653, + "task_loss": 0.05869031697511673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910295439513556, + "compression_loss": 0.0, + "distillation_loss": 0.11052249372005463, + "epoch": 4.42, + "learning_rate": 1.5010024549701312e-05, + "loss": 0.1109, + "step": 4654, + "task_loss": 0.11410997062921524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910366784600736, + "compression_loss": 0.0, + "distillation_loss": 0.06556328386068344, + "epoch": 4.42, + "learning_rate": 1.5000257899745134e-05, + "loss": 0.0636, + "step": 4655, + "task_loss": 0.04608140140771866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910438091849102, + "compression_loss": 0.0, + "distillation_loss": 0.027359087020158768, + "epoch": 4.42, + "learning_rate": 1.4990493066290265e-05, + "loss": 0.0253, + "step": 4656, + "task_loss": 0.0065147485584020615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910509361268689, + "compression_loss": 0.0, + "distillation_loss": 0.03795522451400757, + "epoch": 4.42, + "learning_rate": 1.4980730051110541e-05, + "loss": 0.0557, + "step": 4657, + "task_loss": 0.21516427397727966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910580592869536, + "compression_loss": 0.0, + "distillation_loss": 0.03814719617366791, + "epoch": 4.42, + "learning_rate": 1.4970968855979455e-05, + "loss": 0.046, + "step": 4658, + "task_loss": 0.11639110743999481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910651786661678, + "compression_loss": 0.0, + "distillation_loss": 0.06086570769548416, + "epoch": 4.42, + "learning_rate": 1.496120948267018e-05, + "loss": 0.0621, + "step": 4659, + "task_loss": 0.072751984000206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910722942655152, + "compression_loss": 0.0, + "distillation_loss": 0.02917177602648735, + "epoch": 4.43, + "learning_rate": 1.4951451932955534e-05, + "loss": 0.0277, + "step": 4660, + "task_loss": 0.014873206615447998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910794060859996, + "compression_loss": 0.0, + "distillation_loss": 0.05168045312166214, + "epoch": 4.43, + "learning_rate": 1.4941696208608056e-05, + "loss": 0.0492, + "step": 4661, + "task_loss": 0.027083786204457283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910865141286245, + "compression_loss": 0.0, + "distillation_loss": 0.09697671234607697, + "epoch": 4.43, + "learning_rate": 1.4931942311399896e-05, + "loss": 0.1167, + "step": 4662, + "task_loss": 0.29466700553894043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7910936183943937, + "compression_loss": 0.0, + "distillation_loss": 0.09114208817481995, + "epoch": 4.43, + "learning_rate": 1.4922190243102905e-05, + "loss": 0.0878, + "step": 4663, + "task_loss": 0.057473860681056976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911007188843109, + "compression_loss": 0.0, + "distillation_loss": 0.05134554207324982, + "epoch": 4.43, + "learning_rate": 1.4912440005488593e-05, + "loss": 0.0732, + "step": 4664, + "task_loss": 0.2698168158531189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911078155993797, + "compression_loss": 0.0, + "distillation_loss": 0.024719703942537308, + "epoch": 4.43, + "learning_rate": 1.4902691600328134e-05, + "loss": 0.023, + "step": 4665, + "task_loss": 0.0074704308062791824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911149085406038, + "compression_loss": 0.0, + "distillation_loss": 0.11209557205438614, + "epoch": 4.43, + "learning_rate": 1.489294502939238e-05, + "loss": 0.1162, + "step": 4666, + "task_loss": 0.15347597002983093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911219977089871, + "compression_loss": 0.0, + "distillation_loss": 0.08372636884450912, + "epoch": 4.43, + "learning_rate": 1.4883200294451832e-05, + "loss": 0.0796, + "step": 4667, + "task_loss": 0.04254281520843506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911290831055329, + "compression_loss": 0.0, + "distillation_loss": 0.045971132814884186, + "epoch": 4.43, + "learning_rate": 1.4873457397276675e-05, + "loss": 0.0433, + "step": 4668, + "task_loss": 0.0188460536301136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911361647312452, + "compression_loss": 0.0, + "distillation_loss": 0.012892801314592361, + "epoch": 4.43, + "learning_rate": 1.4863716339636746e-05, + "loss": 0.0171, + "step": 4669, + "task_loss": 0.05512375757098198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911432425871274, + "compression_loss": 0.0, + "distillation_loss": 0.05050332099199295, + "epoch": 4.43, + "learning_rate": 1.4853977123301565e-05, + "loss": 0.062, + "step": 4670, + "task_loss": 0.16584210097789764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911503166741836, + "compression_loss": 0.0, + "distillation_loss": 0.030305128544569016, + "epoch": 4.44, + "learning_rate": 1.4844239750040308e-05, + "loss": 0.0305, + "step": 4671, + "task_loss": 0.0323617160320282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791157386993417, + "compression_loss": 0.0, + "distillation_loss": 0.06064460799098015, + "epoch": 4.44, + "learning_rate": 1.483450422162181e-05, + "loss": 0.0614, + "step": 4672, + "task_loss": 0.06825106590986252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911644535458315, + "compression_loss": 0.0, + "distillation_loss": 0.040299929678440094, + "epoch": 4.44, + "learning_rate": 1.4824770539814575e-05, + "loss": 0.0375, + "step": 4673, + "task_loss": 0.012086659669876099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911715163324309, + "compression_loss": 0.0, + "distillation_loss": 0.05591391772031784, + "epoch": 4.44, + "learning_rate": 1.4815038706386777e-05, + "loss": 0.06, + "step": 4674, + "task_loss": 0.09698733687400818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911785753542188, + "compression_loss": 0.0, + "distillation_loss": 0.038244374096393585, + "epoch": 4.44, + "learning_rate": 1.4805308723106248e-05, + "loss": 0.0403, + "step": 4675, + "task_loss": 0.0591559074819088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911856306121987, + "compression_loss": 0.0, + "distillation_loss": 0.0506531223654747, + "epoch": 4.44, + "learning_rate": 1.4795580591740493e-05, + "loss": 0.0497, + "step": 4676, + "task_loss": 0.04083885997533798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911926821073746, + "compression_loss": 0.0, + "distillation_loss": 0.08757050335407257, + "epoch": 4.44, + "learning_rate": 1.4785854314056652e-05, + "loss": 0.0904, + "step": 4677, + "task_loss": 0.1154625192284584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7911997298407498, + "compression_loss": 0.0, + "distillation_loss": 0.10649511963129044, + "epoch": 4.44, + "learning_rate": 1.4776129891821583e-05, + "loss": 0.1165, + "step": 4678, + "task_loss": 0.2064911276102066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912067738133284, + "compression_loss": 0.0, + "distillation_loss": 0.06318517029285431, + "epoch": 4.44, + "learning_rate": 1.4766407326801751e-05, + "loss": 0.0593, + "step": 4679, + "task_loss": 0.02464883401989937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912138140261138, + "compression_loss": 0.0, + "distillation_loss": 0.020300816744565964, + "epoch": 4.44, + "learning_rate": 1.4756686620763322e-05, + "loss": 0.0197, + "step": 4680, + "task_loss": 0.013965649530291557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912208504801098, + "compression_loss": 0.0, + "distillation_loss": 0.01607617177069187, + "epoch": 4.45, + "learning_rate": 1.4746967775472093e-05, + "loss": 0.0226, + "step": 4681, + "task_loss": 0.08178968727588654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79122788317632, + "compression_loss": 0.0, + "distillation_loss": 0.09621655941009521, + "epoch": 4.45, + "learning_rate": 1.4737250792693546e-05, + "loss": 0.0925, + "step": 4682, + "task_loss": 0.05869336053729057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912349121157481, + "compression_loss": 0.0, + "distillation_loss": 0.02147580124437809, + "epoch": 4.45, + "learning_rate": 1.4727535674192825e-05, + "loss": 0.0198, + "step": 4683, + "task_loss": 0.004298372194170952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912419372993978, + "compression_loss": 0.0, + "distillation_loss": 0.041834309697151184, + "epoch": 4.45, + "learning_rate": 1.4717822421734718e-05, + "loss": 0.0438, + "step": 4684, + "task_loss": 0.06110651418566704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912489587282729, + "compression_loss": 0.0, + "distillation_loss": 0.045896559953689575, + "epoch": 4.45, + "learning_rate": 1.4708111037083683e-05, + "loss": 0.0442, + "step": 4685, + "task_loss": 0.029097534716129303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912559764033769, + "compression_loss": 0.0, + "distillation_loss": 0.030547471717000008, + "epoch": 4.45, + "learning_rate": 1.4698401522003843e-05, + "loss": 0.0281, + "step": 4686, + "task_loss": 0.006017416715621948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912629903257136, + "compression_loss": 0.0, + "distillation_loss": 0.022139865905046463, + "epoch": 4.45, + "learning_rate": 1.4688693878258991e-05, + "loss": 0.0209, + "step": 4687, + "task_loss": 0.009355850517749786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912700004962866, + "compression_loss": 0.0, + "distillation_loss": 0.029008762910962105, + "epoch": 4.45, + "learning_rate": 1.4678988107612546e-05, + "loss": 0.0366, + "step": 4688, + "task_loss": 0.10471072793006897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912770069160996, + "compression_loss": 0.0, + "distillation_loss": 0.019571123644709587, + "epoch": 4.45, + "learning_rate": 1.4669284211827622e-05, + "loss": 0.0302, + "step": 4689, + "task_loss": 0.12538588047027588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912840095861563, + "compression_loss": 0.0, + "distillation_loss": 0.08734140545129776, + "epoch": 4.45, + "learning_rate": 1.4659582192666977e-05, + "loss": 0.0883, + "step": 4690, + "task_loss": 0.09734951704740524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912910085074604, + "compression_loss": 0.0, + "distillation_loss": 0.021236347034573555, + "epoch": 4.45, + "learning_rate": 1.4649882051893022e-05, + "loss": 0.0276, + "step": 4691, + "task_loss": 0.08498524129390717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7912980036810155, + "compression_loss": 0.0, + "distillation_loss": 0.07887224853038788, + "epoch": 4.46, + "learning_rate": 1.464018379126784e-05, + "loss": 0.0804, + "step": 4692, + "task_loss": 0.09387266635894775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913049951078255, + "compression_loss": 0.0, + "distillation_loss": 0.06495728343725204, + "epoch": 4.46, + "learning_rate": 1.4630487412553168e-05, + "loss": 0.0835, + "step": 4693, + "task_loss": 0.2504326403141022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913119827888938, + "compression_loss": 0.0, + "distillation_loss": 0.0405736081302166, + "epoch": 4.46, + "learning_rate": 1.4620792917510395e-05, + "loss": 0.06, + "step": 4694, + "task_loss": 0.2343919277191162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913189667252243, + "compression_loss": 0.0, + "distillation_loss": 0.05520131438970566, + "epoch": 4.46, + "learning_rate": 1.4611100307900572e-05, + "loss": 0.0586, + "step": 4695, + "task_loss": 0.08893582224845886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913259469178205, + "compression_loss": 0.0, + "distillation_loss": 0.05112463980913162, + "epoch": 4.46, + "learning_rate": 1.4601409585484413e-05, + "loss": 0.0487, + "step": 4696, + "task_loss": 0.02662830613553524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913329233676862, + "compression_loss": 0.0, + "distillation_loss": 0.0339503176510334, + "epoch": 4.46, + "learning_rate": 1.4591720752022286e-05, + "loss": 0.0313, + "step": 4697, + "task_loss": 0.007592087611556053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913398960758251, + "compression_loss": 0.0, + "distillation_loss": 0.01650651916861534, + "epoch": 4.46, + "learning_rate": 1.4582033809274215e-05, + "loss": 0.0153, + "step": 4698, + "task_loss": 0.004532504826784134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913468650432408, + "compression_loss": 0.0, + "distillation_loss": 0.08176242560148239, + "epoch": 4.46, + "learning_rate": 1.4572348758999877e-05, + "loss": 0.0844, + "step": 4699, + "task_loss": 0.10765102505683899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913538302709371, + "compression_loss": 0.0, + "distillation_loss": 0.036356210708618164, + "epoch": 4.46, + "learning_rate": 1.4562665602958592e-05, + "loss": 0.0466, + "step": 4700, + "task_loss": 0.13854879140853882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913607917599175, + "compression_loss": 0.0, + "distillation_loss": 0.14445237815380096, + "epoch": 4.46, + "learning_rate": 1.4552984342909382e-05, + "loss": 0.1413, + "step": 4701, + "task_loss": 0.11329984664916992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913677495111858, + "compression_loss": 0.0, + "distillation_loss": 0.015349620953202248, + "epoch": 4.47, + "learning_rate": 1.4543304980610878e-05, + "loss": 0.0141, + "step": 4702, + "task_loss": 0.003350917249917984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913747035257457, + "compression_loss": 0.0, + "distillation_loss": 0.11373439431190491, + "epoch": 4.47, + "learning_rate": 1.4533627517821374e-05, + "loss": 0.127, + "step": 4703, + "task_loss": 0.24672245979309082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913816538046009, + "compression_loss": 0.0, + "distillation_loss": 0.09408354014158249, + "epoch": 4.47, + "learning_rate": 1.452395195629884e-05, + "loss": 0.0891, + "step": 4704, + "task_loss": 0.04441932588815689 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913886003487549, + "compression_loss": 0.0, + "distillation_loss": 0.09586971998214722, + "epoch": 4.47, + "learning_rate": 1.4514278297800893e-05, + "loss": 0.0924, + "step": 4705, + "task_loss": 0.06134894862771034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7913955431592116, + "compression_loss": 0.0, + "distillation_loss": 0.09412407875061035, + "epoch": 4.47, + "learning_rate": 1.4504606544084798e-05, + "loss": 0.0965, + "step": 4706, + "task_loss": 0.1174854040145874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914024822369746, + "compression_loss": 0.0, + "distillation_loss": 0.07334903627634048, + "epoch": 4.47, + "learning_rate": 1.4494936696907458e-05, + "loss": 0.0821, + "step": 4707, + "task_loss": 0.1608203798532486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914094175830476, + "compression_loss": 0.0, + "distillation_loss": 0.023582283407449722, + "epoch": 4.47, + "learning_rate": 1.4485268758025466e-05, + "loss": 0.0217, + "step": 4708, + "task_loss": 0.0046432409435510635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914163491984342, + "compression_loss": 0.0, + "distillation_loss": 0.06829246878623962, + "epoch": 4.47, + "learning_rate": 1.4475602729195048e-05, + "loss": 0.0707, + "step": 4709, + "task_loss": 0.09261967241764069 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914232770841382, + "compression_loss": 0.0, + "distillation_loss": 0.05515692010521889, + "epoch": 4.47, + "learning_rate": 1.446593861217207e-05, + "loss": 0.059, + "step": 4710, + "task_loss": 0.09347251802682877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914302012411631, + "compression_loss": 0.0, + "distillation_loss": 0.0656106099486351, + "epoch": 4.47, + "learning_rate": 1.4456276408712083e-05, + "loss": 0.0618, + "step": 4711, + "task_loss": 0.027708284556865692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914371216705128, + "compression_loss": 0.0, + "distillation_loss": 0.02283991128206253, + "epoch": 4.47, + "learning_rate": 1.4446616120570258e-05, + "loss": 0.0257, + "step": 4712, + "task_loss": 0.051784027367830276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914440383731909, + "compression_loss": 0.0, + "distillation_loss": 0.11995580047369003, + "epoch": 4.48, + "learning_rate": 1.443695774950145e-05, + "loss": 0.1159, + "step": 4713, + "task_loss": 0.07978808879852295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791450951350201, + "compression_loss": 0.0, + "distillation_loss": 0.16006718575954437, + "epoch": 4.48, + "learning_rate": 1.4427301297260129e-05, + "loss": 0.1553, + "step": 4714, + "task_loss": 0.1124170571565628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791457860602547, + "compression_loss": 0.0, + "distillation_loss": 0.09785357117652893, + "epoch": 4.48, + "learning_rate": 1.4417646765600457e-05, + "loss": 0.0959, + "step": 4715, + "task_loss": 0.07808717340230942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914647661312323, + "compression_loss": 0.0, + "distillation_loss": 0.06697249412536621, + "epoch": 4.48, + "learning_rate": 1.4407994156276212e-05, + "loss": 0.0734, + "step": 4716, + "task_loss": 0.131184920668602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914716679372608, + "compression_loss": 0.0, + "distillation_loss": 0.04526514932513237, + "epoch": 4.48, + "learning_rate": 1.4398343471040831e-05, + "loss": 0.0416, + "step": 4717, + "task_loss": 0.009023293852806091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791478566021636, + "compression_loss": 0.0, + "distillation_loss": 0.03617861866950989, + "epoch": 4.48, + "learning_rate": 1.438869471164743e-05, + "loss": 0.0367, + "step": 4718, + "task_loss": 0.04139818996191025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914854603853618, + "compression_loss": 0.0, + "distillation_loss": 0.013502972200512886, + "epoch": 4.48, + "learning_rate": 1.4379047879848736e-05, + "loss": 0.0128, + "step": 4719, + "task_loss": 0.006139175966382027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914923510294417, + "compression_loss": 0.0, + "distillation_loss": 0.03151702880859375, + "epoch": 4.48, + "learning_rate": 1.4369402977397148e-05, + "loss": 0.0295, + "step": 4720, + "task_loss": 0.011361634358763695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7914992379548794, + "compression_loss": 0.0, + "distillation_loss": 0.08465395122766495, + "epoch": 4.48, + "learning_rate": 1.4359760006044686e-05, + "loss": 0.0813, + "step": 4721, + "task_loss": 0.051406532526016235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915061211626787, + "compression_loss": 0.0, + "distillation_loss": 0.02832719124853611, + "epoch": 4.48, + "learning_rate": 1.435011896754308e-05, + "loss": 0.0412, + "step": 4722, + "task_loss": 0.15668340027332306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915130006538432, + "compression_loss": 0.0, + "distillation_loss": 0.05229802057147026, + "epoch": 4.49, + "learning_rate": 1.4340479863643658e-05, + "loss": 0.0571, + "step": 4723, + "task_loss": 0.10068703442811966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915198764293766, + "compression_loss": 0.0, + "distillation_loss": 0.019345447421073914, + "epoch": 4.49, + "learning_rate": 1.4330842696097393e-05, + "loss": 0.018, + "step": 4724, + "task_loss": 0.006194958463311195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915267484902825, + "compression_loss": 0.0, + "distillation_loss": 0.08192586153745651, + "epoch": 4.49, + "learning_rate": 1.4321207466654945e-05, + "loss": 0.0811, + "step": 4725, + "task_loss": 0.07349298149347305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915336168375647, + "compression_loss": 0.0, + "distillation_loss": 0.12662634253501892, + "epoch": 4.49, + "learning_rate": 1.4311574177066594e-05, + "loss": 0.1275, + "step": 4726, + "task_loss": 0.1349095106124878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915404814722269, + "compression_loss": 0.0, + "distillation_loss": 0.01997673325240612, + "epoch": 4.49, + "learning_rate": 1.4301942829082265e-05, + "loss": 0.0185, + "step": 4727, + "task_loss": 0.005590014159679413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915473423952726, + "compression_loss": 0.0, + "distillation_loss": 0.025746600702404976, + "epoch": 4.49, + "learning_rate": 1.4292313424451536e-05, + "loss": 0.024, + "step": 4728, + "task_loss": 0.007853610441088676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915541996077057, + "compression_loss": 0.0, + "distillation_loss": 0.03533206880092621, + "epoch": 4.49, + "learning_rate": 1.4282685964923642e-05, + "loss": 0.0372, + "step": 4729, + "task_loss": 0.05367380380630493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915610531105297, + "compression_loss": 0.0, + "distillation_loss": 0.06636801362037659, + "epoch": 4.49, + "learning_rate": 1.427306045224747e-05, + "loss": 0.0671, + "step": 4730, + "task_loss": 0.07363536953926086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915679029047484, + "compression_loss": 0.0, + "distillation_loss": 0.15005621314048767, + "epoch": 4.49, + "learning_rate": 1.4263436888171516e-05, + "loss": 0.1441, + "step": 4731, + "task_loss": 0.0903296247124672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915747489913655, + "compression_loss": 0.0, + "distillation_loss": 0.14973436295986176, + "epoch": 4.49, + "learning_rate": 1.4253815274443965e-05, + "loss": 0.1512, + "step": 4732, + "task_loss": 0.16403284668922424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915815913713846, + "compression_loss": 0.0, + "distillation_loss": 0.04621317982673645, + "epoch": 4.49, + "learning_rate": 1.424419561281263e-05, + "loss": 0.0627, + "step": 4733, + "task_loss": 0.21088391542434692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915884300458095, + "compression_loss": 0.0, + "distillation_loss": 0.021405549719929695, + "epoch": 4.5, + "learning_rate": 1.423457790502496e-05, + "loss": 0.0199, + "step": 4734, + "task_loss": 0.0061973873525857925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7915952650156436, + "compression_loss": 0.0, + "distillation_loss": 0.07480795681476593, + "epoch": 4.5, + "learning_rate": 1.4224962152828054e-05, + "loss": 0.0678, + "step": 4735, + "task_loss": 0.004553038626909256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916020962818909, + "compression_loss": 0.0, + "distillation_loss": 0.023463796824216843, + "epoch": 4.5, + "learning_rate": 1.4215348357968669e-05, + "loss": 0.0456, + "step": 4736, + "task_loss": 0.24516035616397858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916089238455549, + "compression_loss": 0.0, + "distillation_loss": 0.019841229543089867, + "epoch": 4.5, + "learning_rate": 1.4205736522193197e-05, + "loss": 0.0224, + "step": 4737, + "task_loss": 0.0451948307454586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916157477076394, + "compression_loss": 0.0, + "distillation_loss": 0.11894679069519043, + "epoch": 4.5, + "learning_rate": 1.4196126647247654e-05, + "loss": 0.1265, + "step": 4738, + "task_loss": 0.19399774074554443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916225678691481, + "compression_loss": 0.0, + "distillation_loss": 0.023783180862665176, + "epoch": 4.5, + "learning_rate": 1.4186518734877757e-05, + "loss": 0.0354, + "step": 4739, + "task_loss": 0.13973920047283173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916293843310844, + "compression_loss": 0.0, + "distillation_loss": 0.04689887911081314, + "epoch": 4.5, + "learning_rate": 1.4176912786828808e-05, + "loss": 0.0544, + "step": 4740, + "task_loss": 0.12143304944038391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916361970944523, + "compression_loss": 0.0, + "distillation_loss": 0.02539118006825447, + "epoch": 4.5, + "learning_rate": 1.4167308804845774e-05, + "loss": 0.0285, + "step": 4741, + "task_loss": 0.05643775314092636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916430061602554, + "compression_loss": 0.0, + "distillation_loss": 0.04983173683285713, + "epoch": 4.5, + "learning_rate": 1.4157706790673262e-05, + "loss": 0.0469, + "step": 4742, + "task_loss": 0.020453961566090584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916498115294973, + "compression_loss": 0.0, + "distillation_loss": 0.023538459092378616, + "epoch": 4.5, + "learning_rate": 1.4148106746055535e-05, + "loss": 0.0218, + "step": 4743, + "task_loss": 0.0058768633753061295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916566132031818, + "compression_loss": 0.0, + "distillation_loss": 0.07629226893186569, + "epoch": 4.51, + "learning_rate": 1.4138508672736483e-05, + "loss": 0.0728, + "step": 4744, + "task_loss": 0.04148370400071144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916634111823124, + "compression_loss": 0.0, + "distillation_loss": 0.049477558583021164, + "epoch": 4.51, + "learning_rate": 1.4128912572459629e-05, + "loss": 0.0464, + "step": 4745, + "task_loss": 0.018751783296465874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791670205467893, + "compression_loss": 0.0, + "distillation_loss": 0.1168399229645729, + "epoch": 4.51, + "learning_rate": 1.4119318446968171e-05, + "loss": 0.1262, + "step": 4746, + "task_loss": 0.21064327657222748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916769960609271, + "compression_loss": 0.0, + "distillation_loss": 0.03802032768726349, + "epoch": 4.51, + "learning_rate": 1.4109726298004911e-05, + "loss": 0.0437, + "step": 4747, + "task_loss": 0.09514382481575012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916837829624186, + "compression_loss": 0.0, + "distillation_loss": 0.05071219056844711, + "epoch": 4.51, + "learning_rate": 1.4100136127312324e-05, + "loss": 0.0538, + "step": 4748, + "task_loss": 0.0818718820810318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916905661733709, + "compression_loss": 0.0, + "distillation_loss": 0.05683235824108124, + "epoch": 4.51, + "learning_rate": 1.4090547936632494e-05, + "loss": 0.0543, + "step": 4749, + "task_loss": 0.03133031725883484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7916973456947879, + "compression_loss": 0.0, + "distillation_loss": 0.02370285615324974, + "epoch": 4.51, + "learning_rate": 1.4080961727707184e-05, + "loss": 0.0217, + "step": 4750, + "task_loss": 0.0038769226521253586 + }, + { + "epoch": 4.51, + "eval_accuracy": 0.8922018348623854, + "eval_loss": 0.4650850296020508, + "eval_runtime": 18.1377, + "eval_samples_per_second": 48.077, + "eval_steps_per_second": 6.01, + "step": 4750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917041215276731, + "compression_loss": 0.0, + "distillation_loss": 0.02110632136464119, + "epoch": 4.51, + "learning_rate": 1.4071377502277764e-05, + "loss": 0.0196, + "step": 4751, + "task_loss": 0.005888473242521286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917108936730304, + "compression_loss": 0.0, + "distillation_loss": 0.04167748987674713, + "epoch": 4.51, + "learning_rate": 1.4061795262085243e-05, + "loss": 0.0383, + "step": 4752, + "task_loss": 0.007969718426465988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917176621318633, + "compression_loss": 0.0, + "distillation_loss": 0.03811153396964073, + "epoch": 4.51, + "learning_rate": 1.4052215008870299e-05, + "loss": 0.0526, + "step": 4753, + "task_loss": 0.1829942911863327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917244269051756, + "compression_loss": 0.0, + "distillation_loss": 0.08471325784921646, + "epoch": 4.51, + "learning_rate": 1.4042636744373225e-05, + "loss": 0.0854, + "step": 4754, + "task_loss": 0.09121614694595337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791731187993971, + "compression_loss": 0.0, + "distillation_loss": 0.08335433900356293, + "epoch": 4.52, + "learning_rate": 1.4033060470333948e-05, + "loss": 0.0847, + "step": 4755, + "task_loss": 0.09658505022525787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791737945399253, + "compression_loss": 0.0, + "distillation_loss": 0.09752035140991211, + "epoch": 4.52, + "learning_rate": 1.4023486188492052e-05, + "loss": 0.0917, + "step": 4756, + "task_loss": 0.03966911882162094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917446991220255, + "compression_loss": 0.0, + "distillation_loss": 0.05041617155075073, + "epoch": 4.52, + "learning_rate": 1.4013913900586767e-05, + "loss": 0.0554, + "step": 4757, + "task_loss": 0.10072137415409088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791751449163292, + "compression_loss": 0.0, + "distillation_loss": 0.03461472690105438, + "epoch": 4.52, + "learning_rate": 1.4004343608356928e-05, + "loss": 0.0325, + "step": 4758, + "task_loss": 0.013332528993487358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917581955240564, + "compression_loss": 0.0, + "distillation_loss": 0.11117596924304962, + "epoch": 4.52, + "learning_rate": 1.399477531354102e-05, + "loss": 0.1109, + "step": 4759, + "task_loss": 0.10825280100107193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917649382053221, + "compression_loss": 0.0, + "distillation_loss": 0.12476976960897446, + "epoch": 4.52, + "learning_rate": 1.3985209017877189e-05, + "loss": 0.1206, + "step": 4760, + "task_loss": 0.08343881368637085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917716772080929, + "compression_loss": 0.0, + "distillation_loss": 0.10303467512130737, + "epoch": 4.52, + "learning_rate": 1.3975644723103185e-05, + "loss": 0.1027, + "step": 4761, + "task_loss": 0.09963302314281464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917784125333727, + "compression_loss": 0.0, + "distillation_loss": 0.10136422514915466, + "epoch": 4.52, + "learning_rate": 1.39660824309564e-05, + "loss": 0.0952, + "step": 4762, + "task_loss": 0.03952625393867493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917851441821648, + "compression_loss": 0.0, + "distillation_loss": 0.04904685169458389, + "epoch": 4.52, + "learning_rate": 1.3956522143173894e-05, + "loss": 0.0537, + "step": 4763, + "task_loss": 0.09588026255369186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917918721554732, + "compression_loss": 0.0, + "distillation_loss": 0.04798861965537071, + "epoch": 4.52, + "learning_rate": 1.3946963861492319e-05, + "loss": 0.0518, + "step": 4764, + "task_loss": 0.08626765012741089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7917985964543014, + "compression_loss": 0.0, + "distillation_loss": 0.015419387258589268, + "epoch": 4.53, + "learning_rate": 1.3937407587648e-05, + "loss": 0.0148, + "step": 4765, + "task_loss": 0.009636864066123962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918053170796532, + "compression_loss": 0.0, + "distillation_loss": 0.09505701810121536, + "epoch": 4.53, + "learning_rate": 1.3927853323376855e-05, + "loss": 0.0958, + "step": 4766, + "task_loss": 0.10282032191753387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918120340325321, + "compression_loss": 0.0, + "distillation_loss": 0.07270844280719757, + "epoch": 4.53, + "learning_rate": 1.391830107041449e-05, + "loss": 0.0779, + "step": 4767, + "task_loss": 0.12441182881593704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791818747313942, + "compression_loss": 0.0, + "distillation_loss": 0.1424996554851532, + "epoch": 4.53, + "learning_rate": 1.390875083049611e-05, + "loss": 0.1441, + "step": 4768, + "task_loss": 0.15887659788131714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918254569248865, + "compression_loss": 0.0, + "distillation_loss": 0.020403150469064713, + "epoch": 4.53, + "learning_rate": 1.3899202605356542e-05, + "loss": 0.0188, + "step": 4769, + "task_loss": 0.00390862300992012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918321628663691, + "compression_loss": 0.0, + "distillation_loss": 0.19000548124313354, + "epoch": 4.53, + "learning_rate": 1.38896563967303e-05, + "loss": 0.1851, + "step": 4770, + "task_loss": 0.14091481268405914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918388651393938, + "compression_loss": 0.0, + "distillation_loss": 0.06574898958206177, + "epoch": 4.53, + "learning_rate": 1.3880112206351475e-05, + "loss": 0.0741, + "step": 4771, + "task_loss": 0.14917248487472534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918455637449642, + "compression_loss": 0.0, + "distillation_loss": 0.21178287267684937, + "epoch": 4.53, + "learning_rate": 1.3870570035953811e-05, + "loss": 0.2009, + "step": 4772, + "task_loss": 0.10322417318820953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918522586840838, + "compression_loss": 0.0, + "distillation_loss": 0.03703705966472626, + "epoch": 4.53, + "learning_rate": 1.3861029887270705e-05, + "loss": 0.0342, + "step": 4773, + "task_loss": 0.008660474792122841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918589499577564, + "compression_loss": 0.0, + "distillation_loss": 0.06054367870092392, + "epoch": 4.53, + "learning_rate": 1.3851491762035173e-05, + "loss": 0.0626, + "step": 4774, + "task_loss": 0.08063255250453949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918656375669858, + "compression_loss": 0.0, + "distillation_loss": 0.06310294568538666, + "epoch": 4.53, + "learning_rate": 1.3841955661979856e-05, + "loss": 0.0585, + "step": 4775, + "task_loss": 0.017228560522198677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918723215127754, + "compression_loss": 0.0, + "distillation_loss": 0.05638519302010536, + "epoch": 4.54, + "learning_rate": 1.383242158883702e-05, + "loss": 0.0603, + "step": 4776, + "task_loss": 0.09584720432758331 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918790017961291, + "compression_loss": 0.0, + "distillation_loss": 0.07800333946943283, + "epoch": 4.54, + "learning_rate": 1.3822889544338596e-05, + "loss": 0.0848, + "step": 4777, + "task_loss": 0.14598438143730164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918856784180506, + "compression_loss": 0.0, + "distillation_loss": 0.03396276757121086, + "epoch": 4.54, + "learning_rate": 1.3813359530216113e-05, + "loss": 0.0467, + "step": 4778, + "task_loss": 0.1610591858625412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918923513795434, + "compression_loss": 0.0, + "distillation_loss": 0.04296828433871269, + "epoch": 4.54, + "learning_rate": 1.3803831548200741e-05, + "loss": 0.0402, + "step": 4779, + "task_loss": 0.014961333945393562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7918990206816113, + "compression_loss": 0.0, + "distillation_loss": 0.0331374853849411, + "epoch": 4.54, + "learning_rate": 1.3794305600023296e-05, + "loss": 0.0358, + "step": 4780, + "task_loss": 0.05958189442753792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791905686325258, + "compression_loss": 0.0, + "distillation_loss": 0.017804235219955444, + "epoch": 4.54, + "learning_rate": 1.37847816874142e-05, + "loss": 0.0163, + "step": 4781, + "task_loss": 0.0025765616446733475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919123483114872, + "compression_loss": 0.0, + "distillation_loss": 0.03256119042634964, + "epoch": 4.54, + "learning_rate": 1.377525981210353e-05, + "loss": 0.0302, + "step": 4782, + "task_loss": 0.008887385949492455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919190066413024, + "compression_loss": 0.0, + "distillation_loss": 0.020645175129175186, + "epoch": 4.54, + "learning_rate": 1.3765739975820962e-05, + "loss": 0.0188, + "step": 4783, + "task_loss": 0.0026280879974365234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919256613157075, + "compression_loss": 0.0, + "distillation_loss": 0.036273565143346786, + "epoch": 4.54, + "learning_rate": 1.3756222180295848e-05, + "loss": 0.0377, + "step": 4784, + "task_loss": 0.05044960230588913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919323123357062, + "compression_loss": 0.0, + "distillation_loss": 0.027985138818621635, + "epoch": 4.54, + "learning_rate": 1.3746706427257122e-05, + "loss": 0.0387, + "step": 4785, + "task_loss": 0.13473522663116455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.791938959702302, + "compression_loss": 0.0, + "distillation_loss": 0.0791940987110138, + "epoch": 4.55, + "learning_rate": 1.3737192718433362e-05, + "loss": 0.0858, + "step": 4786, + "task_loss": 0.1456226408481598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919456034164986, + "compression_loss": 0.0, + "distillation_loss": 0.03578998148441315, + "epoch": 4.55, + "learning_rate": 1.3727681055552797e-05, + "loss": 0.0327, + "step": 4787, + "task_loss": 0.004547275602817535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919522434792998, + "compression_loss": 0.0, + "distillation_loss": 0.044103555381298065, + "epoch": 4.55, + "learning_rate": 1.3718171440343253e-05, + "loss": 0.0404, + "step": 4788, + "task_loss": 0.006830913946032524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919588798917092, + "compression_loss": 0.0, + "distillation_loss": 0.019968142732977867, + "epoch": 4.55, + "learning_rate": 1.3708663874532196e-05, + "loss": 0.0184, + "step": 4789, + "task_loss": 0.0039040017873048782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919655126547306, + "compression_loss": 0.0, + "distillation_loss": 0.04486335813999176, + "epoch": 4.55, + "learning_rate": 1.369915835984672e-05, + "loss": 0.0521, + "step": 4790, + "task_loss": 0.11712378263473511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919721417693675, + "compression_loss": 0.0, + "distillation_loss": 0.07928772270679474, + "epoch": 4.55, + "learning_rate": 1.3689654898013568e-05, + "loss": 0.0866, + "step": 4791, + "task_loss": 0.15211215615272522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919787672366236, + "compression_loss": 0.0, + "distillation_loss": 0.08449915051460266, + "epoch": 4.55, + "learning_rate": 1.3680153490759073e-05, + "loss": 0.0801, + "step": 4792, + "task_loss": 0.04078545793890953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919853890575028, + "compression_loss": 0.0, + "distillation_loss": 0.030160902068018913, + "epoch": 4.55, + "learning_rate": 1.3670654139809202e-05, + "loss": 0.0471, + "step": 4793, + "task_loss": 0.19971755146980286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919920072330086, + "compression_loss": 0.0, + "distillation_loss": 0.07142870128154755, + "epoch": 4.55, + "learning_rate": 1.3661156846889584e-05, + "loss": 0.067, + "step": 4794, + "task_loss": 0.027513636276125908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7919986217641447, + "compression_loss": 0.0, + "distillation_loss": 0.06955306231975555, + "epoch": 4.55, + "learning_rate": 1.3651661613725428e-05, + "loss": 0.0697, + "step": 4795, + "task_loss": 0.07086768746376038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920052326519148, + "compression_loss": 0.0, + "distillation_loss": 0.02464466169476509, + "epoch": 4.55, + "learning_rate": 1.3642168442041586e-05, + "loss": 0.028, + "step": 4796, + "task_loss": 0.05863853171467781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920118398973226, + "compression_loss": 0.0, + "distillation_loss": 0.04839901998639107, + "epoch": 4.56, + "learning_rate": 1.3632677333562557e-05, + "loss": 0.0461, + "step": 4797, + "task_loss": 0.0250839926302433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920184435013717, + "compression_loss": 0.0, + "distillation_loss": 0.07862424850463867, + "epoch": 4.56, + "learning_rate": 1.3623188290012434e-05, + "loss": 0.0786, + "step": 4798, + "task_loss": 0.0784490555524826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792025043465066, + "compression_loss": 0.0, + "distillation_loss": 0.1141967624425888, + "epoch": 4.56, + "learning_rate": 1.361370131311494e-05, + "loss": 0.1161, + "step": 4799, + "task_loss": 0.133285790681839 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920316397894089, + "compression_loss": 0.0, + "distillation_loss": 0.027825910598039627, + "epoch": 4.56, + "learning_rate": 1.3604216404593442e-05, + "loss": 0.0255, + "step": 4800, + "task_loss": 0.004920231178402901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920382324754043, + "compression_loss": 0.0, + "distillation_loss": 0.06682395190000534, + "epoch": 4.56, + "learning_rate": 1.3594733566170926e-05, + "loss": 0.0627, + "step": 4801, + "task_loss": 0.025574171915650368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920448215240558, + "compression_loss": 0.0, + "distillation_loss": 0.07818278670310974, + "epoch": 4.56, + "learning_rate": 1.3585252799569987e-05, + "loss": 0.0918, + "step": 4802, + "task_loss": 0.21402281522750854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792051406936367, + "compression_loss": 0.0, + "distillation_loss": 0.03720712661743164, + "epoch": 4.56, + "learning_rate": 1.357577410651284e-05, + "loss": 0.0382, + "step": 4803, + "task_loss": 0.047226957976818085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920579887133417, + "compression_loss": 0.0, + "distillation_loss": 0.069380983710289, + "epoch": 4.56, + "learning_rate": 1.3566297488721352e-05, + "loss": 0.0696, + "step": 4804, + "task_loss": 0.07113489508628845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920645668559835, + "compression_loss": 0.0, + "distillation_loss": 0.029062392190098763, + "epoch": 4.56, + "learning_rate": 1.3556822947916998e-05, + "loss": 0.0366, + "step": 4805, + "task_loss": 0.10417832434177399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920711413652961, + "compression_loss": 0.0, + "distillation_loss": 0.024266686290502548, + "epoch": 4.56, + "learning_rate": 1.354735048582086e-05, + "loss": 0.0301, + "step": 4806, + "task_loss": 0.08248142898082733 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920777122422833, + "compression_loss": 0.0, + "distillation_loss": 0.06912650167942047, + "epoch": 4.57, + "learning_rate": 1.3537880104153644e-05, + "loss": 0.0707, + "step": 4807, + "task_loss": 0.0849725604057312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920842794879486, + "compression_loss": 0.0, + "distillation_loss": 0.016859427094459534, + "epoch": 4.57, + "learning_rate": 1.3528411804635732e-05, + "loss": 0.0157, + "step": 4808, + "task_loss": 0.004863811656832695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920908431032958, + "compression_loss": 0.0, + "distillation_loss": 0.023333366960287094, + "epoch": 4.57, + "learning_rate": 1.3518945588987062e-05, + "loss": 0.0282, + "step": 4809, + "task_loss": 0.07221191376447678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7920974030893286, + "compression_loss": 0.0, + "distillation_loss": 0.0138099305331707, + "epoch": 4.57, + "learning_rate": 1.3509481458927209e-05, + "loss": 0.0365, + "step": 4810, + "task_loss": 0.24029265344142914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921039594470506, + "compression_loss": 0.0, + "distillation_loss": 0.0632215216755867, + "epoch": 4.57, + "learning_rate": 1.3500019416175396e-05, + "loss": 0.0725, + "step": 4811, + "task_loss": 0.15620213747024536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921105121774655, + "compression_loss": 0.0, + "distillation_loss": 0.10874255001544952, + "epoch": 4.57, + "learning_rate": 1.3490559462450445e-05, + "loss": 0.103, + "step": 4812, + "task_loss": 0.051314111799001694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792117061281577, + "compression_loss": 0.0, + "distillation_loss": 0.11337900161743164, + "epoch": 4.57, + "learning_rate": 1.3481101599470794e-05, + "loss": 0.106, + "step": 4813, + "task_loss": 0.03977866470813751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921236067603888, + "compression_loss": 0.0, + "distillation_loss": 0.14787593483924866, + "epoch": 4.57, + "learning_rate": 1.3471645828954504e-05, + "loss": 0.1417, + "step": 4814, + "task_loss": 0.0858084037899971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921301486149046, + "compression_loss": 0.0, + "distillation_loss": 0.07990266382694244, + "epoch": 4.57, + "learning_rate": 1.346219215261928e-05, + "loss": 0.0812, + "step": 4815, + "task_loss": 0.09328167140483856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921366868461279, + "compression_loss": 0.0, + "distillation_loss": 0.029033560305833817, + "epoch": 4.57, + "learning_rate": 1.345274057218241e-05, + "loss": 0.0336, + "step": 4816, + "task_loss": 0.07436929643154144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921432214550627, + "compression_loss": 0.0, + "distillation_loss": 0.0795164555311203, + "epoch": 4.57, + "learning_rate": 1.3443291089360827e-05, + "loss": 0.0783, + "step": 4817, + "task_loss": 0.06710808724164963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921497524427125, + "compression_loss": 0.0, + "distillation_loss": 0.1971079707145691, + "epoch": 4.58, + "learning_rate": 1.3433843705871086e-05, + "loss": 0.205, + "step": 4818, + "task_loss": 0.27602148056030273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921562798100809, + "compression_loss": 0.0, + "distillation_loss": 0.1079106554389, + "epoch": 4.58, + "learning_rate": 1.3424398423429335e-05, + "loss": 0.1086, + "step": 4819, + "task_loss": 0.11473225057125092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921628035581716, + "compression_loss": 0.0, + "distillation_loss": 0.06394623965024948, + "epoch": 4.58, + "learning_rate": 1.3414955243751362e-05, + "loss": 0.065, + "step": 4820, + "task_loss": 0.07409561425447464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921693236879885, + "compression_loss": 0.0, + "distillation_loss": 0.038549233227968216, + "epoch": 4.58, + "learning_rate": 1.3405514168552552e-05, + "loss": 0.0359, + "step": 4821, + "task_loss": 0.011564519256353378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921758402005351, + "compression_loss": 0.0, + "distillation_loss": 0.035132892429828644, + "epoch": 4.58, + "learning_rate": 1.3396075199547944e-05, + "loss": 0.0475, + "step": 4822, + "task_loss": 0.1591106653213501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921823530968151, + "compression_loss": 0.0, + "distillation_loss": 0.12006336450576782, + "epoch": 4.58, + "learning_rate": 1.3386638338452162e-05, + "loss": 0.1215, + "step": 4823, + "task_loss": 0.13471020758152008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921888623778321, + "compression_loss": 0.0, + "distillation_loss": 0.11836139857769012, + "epoch": 4.58, + "learning_rate": 1.3377203586979444e-05, + "loss": 0.1116, + "step": 4824, + "task_loss": 0.050610434263944626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7921953680445901, + "compression_loss": 0.0, + "distillation_loss": 0.03372761979699135, + "epoch": 4.58, + "learning_rate": 1.3367770946843671e-05, + "loss": 0.0362, + "step": 4825, + "task_loss": 0.057958073914051056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922018700980924, + "compression_loss": 0.0, + "distillation_loss": 0.018163051456212997, + "epoch": 4.58, + "learning_rate": 1.3358340419758339e-05, + "loss": 0.021, + "step": 4826, + "task_loss": 0.046153098344802856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792208368539343, + "compression_loss": 0.0, + "distillation_loss": 0.022957507520914078, + "epoch": 4.58, + "learning_rate": 1.3348912007436537e-05, + "loss": 0.0211, + "step": 4827, + "task_loss": 0.004677103832364082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922148633693453, + "compression_loss": 0.0, + "distillation_loss": 0.09547284990549088, + "epoch": 4.58, + "learning_rate": 1.3339485711590965e-05, + "loss": 0.0951, + "step": 4828, + "task_loss": 0.09193813055753708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922213545891031, + "compression_loss": 0.0, + "distillation_loss": 0.031036367639899254, + "epoch": 4.59, + "learning_rate": 1.3330061533933987e-05, + "loss": 0.0287, + "step": 4829, + "task_loss": 0.008153628557920456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922278421996202, + "compression_loss": 0.0, + "distillation_loss": 0.06869910657405853, + "epoch": 4.59, + "learning_rate": 1.3320639476177533e-05, + "loss": 0.0683, + "step": 4830, + "task_loss": 0.06460073590278625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922343262019, + "compression_loss": 0.0, + "distillation_loss": 0.02499844878911972, + "epoch": 4.59, + "learning_rate": 1.3311219540033156e-05, + "loss": 0.023, + "step": 4831, + "task_loss": 0.004916973412036896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922408065969465, + "compression_loss": 0.0, + "distillation_loss": 0.04321729391813278, + "epoch": 4.59, + "learning_rate": 1.3301801727212054e-05, + "loss": 0.0431, + "step": 4832, + "task_loss": 0.04201708361506462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922472833857632, + "compression_loss": 0.0, + "distillation_loss": 0.04636396840214729, + "epoch": 4.59, + "learning_rate": 1.3292386039424998e-05, + "loss": 0.0444, + "step": 4833, + "task_loss": 0.026649996638298035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922537565693538, + "compression_loss": 0.0, + "distillation_loss": 0.08710770308971405, + "epoch": 4.59, + "learning_rate": 1.328297247838241e-05, + "loss": 0.0833, + "step": 4834, + "task_loss": 0.04921592399477959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922602261487222, + "compression_loss": 0.0, + "distillation_loss": 0.05042930692434311, + "epoch": 4.59, + "learning_rate": 1.3273561045794294e-05, + "loss": 0.0674, + "step": 4835, + "task_loss": 0.21977365016937256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922666921248717, + "compression_loss": 0.0, + "distillation_loss": 0.13431815803050995, + "epoch": 4.59, + "learning_rate": 1.3264151743370299e-05, + "loss": 0.1476, + "step": 4836, + "task_loss": 0.2666419744491577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922731544988061, + "compression_loss": 0.0, + "distillation_loss": 0.10049565136432648, + "epoch": 4.59, + "learning_rate": 1.3254744572819658e-05, + "loss": 0.1039, + "step": 4837, + "task_loss": 0.13440871238708496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922796132715293, + "compression_loss": 0.0, + "distillation_loss": 0.07015521079301834, + "epoch": 4.59, + "learning_rate": 1.324533953585122e-05, + "loss": 0.0672, + "step": 4838, + "task_loss": 0.04096302390098572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922860684440447, + "compression_loss": 0.0, + "distillation_loss": 0.040450118482112885, + "epoch": 4.6, + "learning_rate": 1.323593663417348e-05, + "loss": 0.0478, + "step": 4839, + "task_loss": 0.11380739510059357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922925200173562, + "compression_loss": 0.0, + "distillation_loss": 0.02944648265838623, + "epoch": 4.6, + "learning_rate": 1.3226535869494505e-05, + "loss": 0.0349, + "step": 4840, + "task_loss": 0.08415282517671585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7922989679924674, + "compression_loss": 0.0, + "distillation_loss": 0.08658148348331451, + "epoch": 4.6, + "learning_rate": 1.3217137243521981e-05, + "loss": 0.09, + "step": 4841, + "task_loss": 0.12077930569648743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792305412370382, + "compression_loss": 0.0, + "distillation_loss": 0.05245388299226761, + "epoch": 4.6, + "learning_rate": 1.3207740757963225e-05, + "loss": 0.0538, + "step": 4842, + "task_loss": 0.06580542027950287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923118531521036, + "compression_loss": 0.0, + "distillation_loss": 0.06145579740405083, + "epoch": 4.6, + "learning_rate": 1.3198346414525162e-05, + "loss": 0.0614, + "step": 4843, + "task_loss": 0.06057172268629074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792318290338636, + "compression_loss": 0.0, + "distillation_loss": 0.01584722474217415, + "epoch": 4.6, + "learning_rate": 1.318895421491431e-05, + "loss": 0.0147, + "step": 4844, + "task_loss": 0.004009943455457687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923247239309827, + "compression_loss": 0.0, + "distillation_loss": 0.04391245171427727, + "epoch": 4.6, + "learning_rate": 1.3179564160836794e-05, + "loss": 0.0507, + "step": 4845, + "task_loss": 0.11139274388551712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923311539301476, + "compression_loss": 0.0, + "distillation_loss": 0.03385882452130318, + "epoch": 4.6, + "learning_rate": 1.317017625399839e-05, + "loss": 0.0316, + "step": 4846, + "task_loss": 0.011443352326750755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923375803371343, + "compression_loss": 0.0, + "distillation_loss": 0.029054662212729454, + "epoch": 4.6, + "learning_rate": 1.3160790496104441e-05, + "loss": 0.0302, + "step": 4847, + "task_loss": 0.040480926632881165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923440031529465, + "compression_loss": 0.0, + "distillation_loss": 0.10286715626716614, + "epoch": 4.6, + "learning_rate": 1.3151406888859907e-05, + "loss": 0.1156, + "step": 4848, + "task_loss": 0.2303019016981125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923504223785878, + "compression_loss": 0.0, + "distillation_loss": 0.08016088604927063, + "epoch": 4.6, + "learning_rate": 1.3142025433969384e-05, + "loss": 0.0915, + "step": 4849, + "task_loss": 0.19339843094348907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792356838015062, + "compression_loss": 0.0, + "distillation_loss": 0.03166133165359497, + "epoch": 4.61, + "learning_rate": 1.3132646133137053e-05, + "loss": 0.0361, + "step": 4850, + "task_loss": 0.07562405616044998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923632500633726, + "compression_loss": 0.0, + "distillation_loss": 0.026462262496352196, + "epoch": 4.61, + "learning_rate": 1.3123268988066695e-05, + "loss": 0.045, + "step": 4851, + "task_loss": 0.211809903383255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923696585245235, + "compression_loss": 0.0, + "distillation_loss": 0.02861833944916725, + "epoch": 4.61, + "learning_rate": 1.3113894000461721e-05, + "loss": 0.0266, + "step": 4852, + "task_loss": 0.007970165461301804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923760633995182, + "compression_loss": 0.0, + "distillation_loss": 0.07544773817062378, + "epoch": 4.61, + "learning_rate": 1.3104521172025158e-05, + "loss": 0.0724, + "step": 4853, + "task_loss": 0.04472661018371582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923824646893605, + "compression_loss": 0.0, + "distillation_loss": 0.06193629652261734, + "epoch": 4.61, + "learning_rate": 1.3095150504459614e-05, + "loss": 0.0598, + "step": 4854, + "task_loss": 0.04085727035999298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792388862395054, + "compression_loss": 0.0, + "distillation_loss": 0.03407544270157814, + "epoch": 4.61, + "learning_rate": 1.3085781999467303e-05, + "loss": 0.044, + "step": 4855, + "task_loss": 0.13290613889694214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7923952565176026, + "compression_loss": 0.0, + "distillation_loss": 0.15552052855491638, + "epoch": 4.61, + "learning_rate": 1.3076415658750083e-05, + "loss": 0.1622, + "step": 4856, + "task_loss": 0.2220618724822998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924016470580096, + "compression_loss": 0.0, + "distillation_loss": 0.0764533281326294, + "epoch": 4.61, + "learning_rate": 1.3067051484009383e-05, + "loss": 0.0794, + "step": 4857, + "task_loss": 0.10618744790554047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792408034017279, + "compression_loss": 0.0, + "distillation_loss": 0.08214154094457626, + "epoch": 4.61, + "learning_rate": 1.3057689476946238e-05, + "loss": 0.0788, + "step": 4858, + "task_loss": 0.0489237904548645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924144173964144, + "compression_loss": 0.0, + "distillation_loss": 0.18950366973876953, + "epoch": 4.61, + "learning_rate": 1.304832963926132e-05, + "loss": 0.1862, + "step": 4859, + "task_loss": 0.15664705634117126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924207971964193, + "compression_loss": 0.0, + "distillation_loss": 0.062167249619960785, + "epoch": 4.62, + "learning_rate": 1.303897197265489e-05, + "loss": 0.0726, + "step": 4860, + "task_loss": 0.16604240238666534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924271734182977, + "compression_loss": 0.0, + "distillation_loss": 0.02140945754945278, + "epoch": 4.62, + "learning_rate": 1.3029616478826805e-05, + "loss": 0.0202, + "step": 4861, + "task_loss": 0.009080074727535248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792433546063053, + "compression_loss": 0.0, + "distillation_loss": 0.020239079371094704, + "epoch": 4.62, + "learning_rate": 1.3020263159476526e-05, + "loss": 0.0187, + "step": 4862, + "task_loss": 0.004914524033665657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924399151316892, + "compression_loss": 0.0, + "distillation_loss": 0.03199296444654465, + "epoch": 4.62, + "learning_rate": 1.301091201630315e-05, + "loss": 0.0295, + "step": 4863, + "task_loss": 0.0070999301970005035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924462806252096, + "compression_loss": 0.0, + "distillation_loss": 0.05161367356777191, + "epoch": 4.62, + "learning_rate": 1.3001563051005347e-05, + "loss": 0.0562, + "step": 4864, + "task_loss": 0.09769769012928009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924526425446181, + "compression_loss": 0.0, + "distillation_loss": 0.015561849810183048, + "epoch": 4.62, + "learning_rate": 1.2992216265281393e-05, + "loss": 0.0211, + "step": 4865, + "task_loss": 0.07106789946556091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924590008909185, + "compression_loss": 0.0, + "distillation_loss": 0.03964412212371826, + "epoch": 4.62, + "learning_rate": 1.2982871660829191e-05, + "loss": 0.0413, + "step": 4866, + "task_loss": 0.05661403387784958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924653556651141, + "compression_loss": 0.0, + "distillation_loss": 0.030754052102565765, + "epoch": 4.62, + "learning_rate": 1.2973529239346227e-05, + "loss": 0.0281, + "step": 4867, + "task_loss": 0.004459032788872719 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924717068682089, + "compression_loss": 0.0, + "distillation_loss": 0.029556550085544586, + "epoch": 4.62, + "learning_rate": 1.2964189002529586e-05, + "loss": 0.0295, + "step": 4868, + "task_loss": 0.028598371893167496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924780545012066, + "compression_loss": 0.0, + "distillation_loss": 0.022967040538787842, + "epoch": 4.62, + "learning_rate": 1.2954850952075982e-05, + "loss": 0.0212, + "step": 4869, + "task_loss": 0.004923565313220024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924843985651107, + "compression_loss": 0.0, + "distillation_loss": 0.07921673357486725, + "epoch": 4.62, + "learning_rate": 1.294551508968172e-05, + "loss": 0.0833, + "step": 4870, + "task_loss": 0.11980371177196503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7924907390609249, + "compression_loss": 0.0, + "distillation_loss": 0.0501846969127655, + "epoch": 4.63, + "learning_rate": 1.2936181417042697e-05, + "loss": 0.0469, + "step": 4871, + "task_loss": 0.01730802096426487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792497075989653, + "compression_loss": 0.0, + "distillation_loss": 0.02974601648747921, + "epoch": 4.63, + "learning_rate": 1.2926849935854413e-05, + "loss": 0.0274, + "step": 4872, + "task_loss": 0.006090117618441582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925034093522986, + "compression_loss": 0.0, + "distillation_loss": 0.02171606570482254, + "epoch": 4.63, + "learning_rate": 1.2917520647811987e-05, + "loss": 0.0268, + "step": 4873, + "task_loss": 0.07253267616033554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925097391498654, + "compression_loss": 0.0, + "distillation_loss": 0.022985346615314484, + "epoch": 4.63, + "learning_rate": 1.2908193554610128e-05, + "loss": 0.0239, + "step": 4874, + "task_loss": 0.03176088258624077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925160653833572, + "compression_loss": 0.0, + "distillation_loss": 0.029063984751701355, + "epoch": 4.63, + "learning_rate": 1.2898868657943137e-05, + "loss": 0.0273, + "step": 4875, + "task_loss": 0.010947054252028465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925223880537774, + "compression_loss": 0.0, + "distillation_loss": 0.03057437390089035, + "epoch": 4.63, + "learning_rate": 1.2889545959504939e-05, + "loss": 0.0359, + "step": 4876, + "task_loss": 0.08393832296133041 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79252870716213, + "compression_loss": 0.0, + "distillation_loss": 0.0298734400421381, + "epoch": 4.63, + "learning_rate": 1.2880225460989038e-05, + "loss": 0.0332, + "step": 4877, + "task_loss": 0.0632658377289772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925350227094184, + "compression_loss": 0.0, + "distillation_loss": 0.0205811969935894, + "epoch": 4.63, + "learning_rate": 1.2870907164088557e-05, + "loss": 0.0191, + "step": 4878, + "task_loss": 0.005966978147625923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925413346966464, + "compression_loss": 0.0, + "distillation_loss": 0.016166334971785545, + "epoch": 4.63, + "learning_rate": 1.2861591070496193e-05, + "loss": 0.0274, + "step": 4879, + "task_loss": 0.1287321150302887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925476431248178, + "compression_loss": 0.0, + "distillation_loss": 0.03941744565963745, + "epoch": 4.63, + "learning_rate": 1.2852277181904282e-05, + "loss": 0.0516, + "step": 4880, + "task_loss": 0.16162440180778503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925539479949362, + "compression_loss": 0.0, + "distillation_loss": 0.06537621468305588, + "epoch": 4.64, + "learning_rate": 1.2842965500004728e-05, + "loss": 0.0704, + "step": 4881, + "task_loss": 0.11553017050027847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925602493080053, + "compression_loss": 0.0, + "distillation_loss": 0.036567624658346176, + "epoch": 4.64, + "learning_rate": 1.2833656026489028e-05, + "loss": 0.034, + "step": 4882, + "task_loss": 0.010592048987746239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925665470650286, + "compression_loss": 0.0, + "distillation_loss": 0.022438403218984604, + "epoch": 4.64, + "learning_rate": 1.282434876304831e-05, + "loss": 0.0208, + "step": 4883, + "task_loss": 0.006338924169540405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79257284126701, + "compression_loss": 0.0, + "distillation_loss": 0.07297282665967941, + "epoch": 4.64, + "learning_rate": 1.2815043711373285e-05, + "loss": 0.0853, + "step": 4884, + "task_loss": 0.19581279158592224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925791319149531, + "compression_loss": 0.0, + "distillation_loss": 0.036048613488674164, + "epoch": 4.64, + "learning_rate": 1.2805740873154237e-05, + "loss": 0.0621, + "step": 4885, + "task_loss": 0.29630377888679504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925854190098616, + "compression_loss": 0.0, + "distillation_loss": 0.055907703936100006, + "epoch": 4.64, + "learning_rate": 1.2796440250081093e-05, + "loss": 0.0643, + "step": 4886, + "task_loss": 0.13965827226638794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925917025527391, + "compression_loss": 0.0, + "distillation_loss": 0.04197829216718674, + "epoch": 4.64, + "learning_rate": 1.2787141843843359e-05, + "loss": 0.0383, + "step": 4887, + "task_loss": 0.0054167453199625015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7925979825445895, + "compression_loss": 0.0, + "distillation_loss": 0.029558390378952026, + "epoch": 4.64, + "learning_rate": 1.2777845656130122e-05, + "loss": 0.0272, + "step": 4888, + "task_loss": 0.006031878292560577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926042589864162, + "compression_loss": 0.0, + "distillation_loss": 0.04860628768801689, + "epoch": 4.64, + "learning_rate": 1.276855168863008e-05, + "loss": 0.0644, + "step": 4889, + "task_loss": 0.20697267353534698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792610531879223, + "compression_loss": 0.0, + "distillation_loss": 0.051894620060920715, + "epoch": 4.64, + "learning_rate": 1.2759259943031538e-05, + "loss": 0.057, + "step": 4890, + "task_loss": 0.10261577367782593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926168012240138, + "compression_loss": 0.0, + "distillation_loss": 0.027323635295033455, + "epoch": 4.64, + "learning_rate": 1.2749970421022381e-05, + "loss": 0.0323, + "step": 4891, + "task_loss": 0.07732345908880234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792623067021792, + "compression_loss": 0.0, + "distillation_loss": 0.05105939507484436, + "epoch": 4.65, + "learning_rate": 1.2740683124290081e-05, + "loss": 0.0573, + "step": 4892, + "task_loss": 0.11377457529306412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926293292735612, + "compression_loss": 0.0, + "distillation_loss": 0.014040287584066391, + "epoch": 4.65, + "learning_rate": 1.2731398054521748e-05, + "loss": 0.0218, + "step": 4893, + "task_loss": 0.09142255038022995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926355879803254, + "compression_loss": 0.0, + "distillation_loss": 0.0386216938495636, + "epoch": 4.65, + "learning_rate": 1.2722115213404031e-05, + "loss": 0.0425, + "step": 4894, + "task_loss": 0.07720839232206345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926418431430882, + "compression_loss": 0.0, + "distillation_loss": 0.06361688673496246, + "epoch": 4.65, + "learning_rate": 1.2712834602623228e-05, + "loss": 0.0605, + "step": 4895, + "task_loss": 0.03238803148269653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792648094762853, + "compression_loss": 0.0, + "distillation_loss": 0.02098456397652626, + "epoch": 4.65, + "learning_rate": 1.2703556223865181e-05, + "loss": 0.0331, + "step": 4896, + "task_loss": 0.14170801639556885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926543428406239, + "compression_loss": 0.0, + "distillation_loss": 0.06998811662197113, + "epoch": 4.65, + "learning_rate": 1.2694280078815382e-05, + "loss": 0.0669, + "step": 4897, + "task_loss": 0.03889083489775658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926605873774042, + "compression_loss": 0.0, + "distillation_loss": 0.03850919380784035, + "epoch": 4.65, + "learning_rate": 1.2685006169158869e-05, + "loss": 0.0413, + "step": 4898, + "task_loss": 0.06683476269245148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926668283741979, + "compression_loss": 0.0, + "distillation_loss": 0.15333296358585358, + "epoch": 4.65, + "learning_rate": 1.2675734496580285e-05, + "loss": 0.1567, + "step": 4899, + "task_loss": 0.18686996400356293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926730658320085, + "compression_loss": 0.0, + "distillation_loss": 0.12471291422843933, + "epoch": 4.65, + "learning_rate": 1.2666465062763894e-05, + "loss": 0.1294, + "step": 4900, + "task_loss": 0.17144428193569183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926792997518396, + "compression_loss": 0.0, + "distillation_loss": 0.11362001299858093, + "epoch": 4.65, + "learning_rate": 1.2657197869393523e-05, + "loss": 0.1113, + "step": 4901, + "task_loss": 0.09047464281320572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926855301346951, + "compression_loss": 0.0, + "distillation_loss": 0.029277116060256958, + "epoch": 4.66, + "learning_rate": 1.2647932918152606e-05, + "loss": 0.0279, + "step": 4902, + "task_loss": 0.015509987249970436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926917569815787, + "compression_loss": 0.0, + "distillation_loss": 0.0541316494345665, + "epoch": 4.66, + "learning_rate": 1.2638670210724138e-05, + "loss": 0.0605, + "step": 4903, + "task_loss": 0.11828672140836716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7926979802934938, + "compression_loss": 0.0, + "distillation_loss": 0.038249123841524124, + "epoch": 4.66, + "learning_rate": 1.2629409748790782e-05, + "loss": 0.0403, + "step": 4904, + "task_loss": 0.058266542851924896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927042000714443, + "compression_loss": 0.0, + "distillation_loss": 0.055571284145116806, + "epoch": 4.66, + "learning_rate": 1.262015153403472e-05, + "loss": 0.0609, + "step": 4905, + "task_loss": 0.10911326855421066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927104163164338, + "compression_loss": 0.0, + "distillation_loss": 0.0703677237033844, + "epoch": 4.66, + "learning_rate": 1.2610895568137754e-05, + "loss": 0.0799, + "step": 4906, + "task_loss": 0.1653900146484375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927166290294662, + "compression_loss": 0.0, + "distillation_loss": 0.04402827471494675, + "epoch": 4.66, + "learning_rate": 1.2601641852781265e-05, + "loss": 0.0499, + "step": 4907, + "task_loss": 0.1026904359459877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927228382115448, + "compression_loss": 0.0, + "distillation_loss": 0.0630364716053009, + "epoch": 4.66, + "learning_rate": 1.2592390389646258e-05, + "loss": 0.0653, + "step": 4908, + "task_loss": 0.08615773171186447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927290438636735, + "compression_loss": 0.0, + "distillation_loss": 0.1884252429008484, + "epoch": 4.66, + "learning_rate": 1.2583141180413288e-05, + "loss": 0.1818, + "step": 4909, + "task_loss": 0.12265457212924957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792735245986856, + "compression_loss": 0.0, + "distillation_loss": 0.030301768332719803, + "epoch": 4.66, + "learning_rate": 1.2573894226762518e-05, + "loss": 0.03, + "step": 4910, + "task_loss": 0.02761382982134819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927414445820959, + "compression_loss": 0.0, + "distillation_loss": 0.08405353128910065, + "epoch": 4.66, + "learning_rate": 1.25646495303737e-05, + "loss": 0.0817, + "step": 4911, + "task_loss": 0.06076852232217789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792747639650397, + "compression_loss": 0.0, + "distillation_loss": 0.02579943835735321, + "epoch": 4.66, + "learning_rate": 1.2555407092926197e-05, + "loss": 0.0374, + "step": 4912, + "task_loss": 0.14211821556091309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927538311927629, + "compression_loss": 0.0, + "distillation_loss": 0.07202491909265518, + "epoch": 4.67, + "learning_rate": 1.2546166916098928e-05, + "loss": 0.0744, + "step": 4913, + "task_loss": 0.09626448899507523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927600192101972, + "compression_loss": 0.0, + "distillation_loss": 0.04386778920888901, + "epoch": 4.67, + "learning_rate": 1.253692900157041e-05, + "loss": 0.0428, + "step": 4914, + "task_loss": 0.03336333483457565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927662037037038, + "compression_loss": 0.0, + "distillation_loss": 0.05689205601811409, + "epoch": 4.67, + "learning_rate": 1.252769335101877e-05, + "loss": 0.0683, + "step": 4915, + "task_loss": 0.17090968787670135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927723846742861, + "compression_loss": 0.0, + "distillation_loss": 0.1544632911682129, + "epoch": 4.67, + "learning_rate": 1.25184599661217e-05, + "loss": 0.1538, + "step": 4916, + "task_loss": 0.14771823585033417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927785621229481, + "compression_loss": 0.0, + "distillation_loss": 0.015177516266703606, + "epoch": 4.67, + "learning_rate": 1.2509228848556482e-05, + "loss": 0.0142, + "step": 4917, + "task_loss": 0.005492331460118294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927847360506932, + "compression_loss": 0.0, + "distillation_loss": 0.039150480180978775, + "epoch": 4.67, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.0361, + "step": 4918, + "task_loss": 0.00842754915356636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927909064585252, + "compression_loss": 0.0, + "distillation_loss": 0.024652540683746338, + "epoch": 4.67, + "learning_rate": 1.2490773422128732e-05, + "loss": 0.0228, + "step": 4919, + "task_loss": 0.005637306720018387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7927970733474479, + "compression_loss": 0.0, + "distillation_loss": 0.04846763238310814, + "epoch": 4.67, + "learning_rate": 1.2481549116618698e-05, + "loss": 0.0448, + "step": 4920, + "task_loss": 0.011590449139475822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928032367184648, + "compression_loss": 0.0, + "distillation_loss": 0.014604704454541206, + "epoch": 4.67, + "learning_rate": 1.247232708514556e-05, + "loss": 0.0136, + "step": 4921, + "task_loss": 0.004115687683224678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928093965725795, + "compression_loss": 0.0, + "distillation_loss": 0.027096299454569817, + "epoch": 4.67, + "learning_rate": 1.2463107329384552e-05, + "loss": 0.0252, + "step": 4922, + "task_loss": 0.008570542559027672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.792815552910796, + "compression_loss": 0.0, + "distillation_loss": 0.023527517914772034, + "epoch": 4.68, + "learning_rate": 1.2453889851010473e-05, + "loss": 0.0217, + "step": 4923, + "task_loss": 0.00551653653383255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928217057341178, + "compression_loss": 0.0, + "distillation_loss": 0.07601621747016907, + "epoch": 4.68, + "learning_rate": 1.2444674651697716e-05, + "loss": 0.0799, + "step": 4924, + "task_loss": 0.1144181340932846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928278550435486, + "compression_loss": 0.0, + "distillation_loss": 0.02135540172457695, + "epoch": 4.68, + "learning_rate": 1.2435461733120287e-05, + "loss": 0.0337, + "step": 4925, + "task_loss": 0.1447429358959198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928340008400919, + "compression_loss": 0.0, + "distillation_loss": 0.0660846009850502, + "epoch": 4.68, + "learning_rate": 1.2426251096951744e-05, + "loss": 0.064, + "step": 4926, + "task_loss": 0.04477040097117424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928401431247517, + "compression_loss": 0.0, + "distillation_loss": 0.037703558802604675, + "epoch": 4.68, + "learning_rate": 1.2417042744865237e-05, + "loss": 0.0531, + "step": 4927, + "task_loss": 0.19128406047821045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928462818985316, + "compression_loss": 0.0, + "distillation_loss": 0.024694714695215225, + "epoch": 4.68, + "learning_rate": 1.2407836678533523e-05, + "loss": 0.0321, + "step": 4928, + "task_loss": 0.09900853782892227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928524171624352, + "compression_loss": 0.0, + "distillation_loss": 0.05749480798840523, + "epoch": 4.68, + "learning_rate": 1.2398632899628912e-05, + "loss": 0.054, + "step": 4929, + "task_loss": 0.022426774725317955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928585489174661, + "compression_loss": 0.0, + "distillation_loss": 0.029665688052773476, + "epoch": 4.68, + "learning_rate": 1.2389431409823336e-05, + "loss": 0.0283, + "step": 4930, + "task_loss": 0.01619105413556099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928646771646282, + "compression_loss": 0.0, + "distillation_loss": 0.031163331121206284, + "epoch": 4.68, + "learning_rate": 1.2380232210788265e-05, + "loss": 0.0326, + "step": 4931, + "task_loss": 0.045856185257434845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928708019049249, + "compression_loss": 0.0, + "distillation_loss": 0.08584082871675491, + "epoch": 4.68, + "learning_rate": 1.23710353041948e-05, + "loss": 0.0879, + "step": 4932, + "task_loss": 0.10655449330806732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928769231393602, + "compression_loss": 0.0, + "distillation_loss": 0.11349561810493469, + "epoch": 4.68, + "learning_rate": 1.2361840691713595e-05, + "loss": 0.1096, + "step": 4933, + "task_loss": 0.07412466406822205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928830408689376, + "compression_loss": 0.0, + "distillation_loss": 0.0716467946767807, + "epoch": 4.69, + "learning_rate": 1.2352648375014883e-05, + "loss": 0.0791, + "step": 4934, + "task_loss": 0.14578455686569214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928891550946607, + "compression_loss": 0.0, + "distillation_loss": 0.03932729363441467, + "epoch": 4.69, + "learning_rate": 1.2343458355768513e-05, + "loss": 0.0449, + "step": 4935, + "task_loss": 0.09523576498031616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7928952658175334, + "compression_loss": 0.0, + "distillation_loss": 0.06750982254743576, + "epoch": 4.69, + "learning_rate": 1.233427063564389e-05, + "loss": 0.0789, + "step": 4936, + "task_loss": 0.18135161697864532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929013730385593, + "compression_loss": 0.0, + "distillation_loss": 0.05587318539619446, + "epoch": 4.69, + "learning_rate": 1.2325085216309994e-05, + "loss": 0.066, + "step": 4937, + "task_loss": 0.156914621591568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929074767587421, + "compression_loss": 0.0, + "distillation_loss": 0.044295355677604675, + "epoch": 4.69, + "learning_rate": 1.231590209943541e-05, + "loss": 0.0426, + "step": 4938, + "task_loss": 0.02747635915875435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929135769790853, + "compression_loss": 0.0, + "distillation_loss": 0.010653965175151825, + "epoch": 4.69, + "learning_rate": 1.230672128668831e-05, + "loss": 0.0163, + "step": 4939, + "task_loss": 0.06707858294248581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929196737005928, + "compression_loss": 0.0, + "distillation_loss": 0.10607406497001648, + "epoch": 4.69, + "learning_rate": 1.2297542779736417e-05, + "loss": 0.1061, + "step": 4940, + "task_loss": 0.10609476268291473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929257669242682, + "compression_loss": 0.0, + "distillation_loss": 0.025449592620134354, + "epoch": 4.69, + "learning_rate": 1.2288366580247047e-05, + "loss": 0.0324, + "step": 4941, + "task_loss": 0.09515959024429321 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929318566511152, + "compression_loss": 0.0, + "distillation_loss": 0.042621444910764694, + "epoch": 4.69, + "learning_rate": 1.2279192689887115e-05, + "loss": 0.0455, + "step": 4942, + "task_loss": 0.07145626842975616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929379428821375, + "compression_loss": 0.0, + "distillation_loss": 0.01993633061647415, + "epoch": 4.69, + "learning_rate": 1.2270021110323096e-05, + "loss": 0.0186, + "step": 4943, + "task_loss": 0.00634993240237236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929440256183387, + "compression_loss": 0.0, + "distillation_loss": 0.014678630977869034, + "epoch": 4.7, + "learning_rate": 1.2260851843221039e-05, + "loss": 0.0236, + "step": 4944, + "task_loss": 0.10428041964769363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929501048607225, + "compression_loss": 0.0, + "distillation_loss": 0.02224035933613777, + "epoch": 4.7, + "learning_rate": 1.225168489024661e-05, + "loss": 0.0263, + "step": 4945, + "task_loss": 0.0633140280842781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929561806102927, + "compression_loss": 0.0, + "distillation_loss": 0.0771671012043953, + "epoch": 4.7, + "learning_rate": 1.2242520253065004e-05, + "loss": 0.0854, + "step": 4946, + "task_loss": 0.15969133377075195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929622528680528, + "compression_loss": 0.0, + "distillation_loss": 0.012984257191419601, + "epoch": 4.7, + "learning_rate": 1.2233357933341047e-05, + "loss": 0.0249, + "step": 4947, + "task_loss": 0.13174743950366974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929683216350066, + "compression_loss": 0.0, + "distillation_loss": 0.013885335996747017, + "epoch": 4.7, + "learning_rate": 1.2224197932739096e-05, + "loss": 0.0128, + "step": 4948, + "task_loss": 0.0031508170068264008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929743869121578, + "compression_loss": 0.0, + "distillation_loss": 0.01262105256319046, + "epoch": 4.7, + "learning_rate": 1.2215040252923127e-05, + "loss": 0.021, + "step": 4949, + "task_loss": 0.09617089480161667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929804487005101, + "compression_loss": 0.0, + "distillation_loss": 0.07396592199802399, + "epoch": 4.7, + "learning_rate": 1.2205884895556672e-05, + "loss": 0.0722, + "step": 4950, + "task_loss": 0.05672897398471832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929865070010671, + "compression_loss": 0.0, + "distillation_loss": 0.019163163378834724, + "epoch": 4.7, + "learning_rate": 1.219673186230283e-05, + "loss": 0.018, + "step": 4951, + "task_loss": 0.007804272696375847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7929925618148325, + "compression_loss": 0.0, + "distillation_loss": 0.06364789605140686, + "epoch": 4.7, + "learning_rate": 1.2187581154824317e-05, + "loss": 0.0665, + "step": 4952, + "task_loss": 0.09249377250671387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79299861314281, + "compression_loss": 0.0, + "distillation_loss": 0.09652909636497498, + "epoch": 4.7, + "learning_rate": 1.2178432774783394e-05, + "loss": 0.0985, + "step": 4953, + "task_loss": 0.11579639464616776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930046609860032, + "compression_loss": 0.0, + "distillation_loss": 0.0369391068816185, + "epoch": 4.7, + "learning_rate": 1.2169286723841897e-05, + "loss": 0.0342, + "step": 4954, + "task_loss": 0.009912891313433647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930107053454158, + "compression_loss": 0.0, + "distillation_loss": 0.03373286873102188, + "epoch": 4.71, + "learning_rate": 1.216014300366126e-05, + "loss": 0.0389, + "step": 4955, + "task_loss": 0.08519351482391357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930167462220518, + "compression_loss": 0.0, + "distillation_loss": 0.024067046120762825, + "epoch": 4.71, + "learning_rate": 1.2151001615902493e-05, + "loss": 0.032, + "step": 4956, + "task_loss": 0.10371088981628418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930227836169144, + "compression_loss": 0.0, + "distillation_loss": 0.14827989041805267, + "epoch": 4.71, + "learning_rate": 1.2141862562226165e-05, + "loss": 0.1495, + "step": 4957, + "task_loss": 0.15999506413936615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930288175310075, + "compression_loss": 0.0, + "distillation_loss": 0.04894055798649788, + "epoch": 4.71, + "learning_rate": 1.2132725844292416e-05, + "loss": 0.0561, + "step": 4958, + "task_loss": 0.12008975446224213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930348479653349, + "compression_loss": 0.0, + "distillation_loss": 0.08717606216669083, + "epoch": 4.71, + "learning_rate": 1.2123591463760997e-05, + "loss": 0.0892, + "step": 4959, + "task_loss": 0.10745367407798767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930408749209001, + "compression_loss": 0.0, + "distillation_loss": 0.030964989215135574, + "epoch": 4.71, + "learning_rate": 1.2114459422291205e-05, + "loss": 0.0285, + "step": 4960, + "task_loss": 0.006043644621968269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930468983987069, + "compression_loss": 0.0, + "distillation_loss": 0.021385207772254944, + "epoch": 4.71, + "learning_rate": 1.2105329721541903e-05, + "loss": 0.03, + "step": 4961, + "task_loss": 0.10744792968034744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930529183997588, + "compression_loss": 0.0, + "distillation_loss": 0.1010364517569542, + "epoch": 4.71, + "learning_rate": 1.2096202363171571e-05, + "loss": 0.0969, + "step": 4962, + "task_loss": 0.05933345481753349 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930589349250597, + "compression_loss": 0.0, + "distillation_loss": 0.0767788365483284, + "epoch": 4.71, + "learning_rate": 1.2087077348838214e-05, + "loss": 0.0806, + "step": 4963, + "task_loss": 0.11461080610752106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930649479756132, + "compression_loss": 0.0, + "distillation_loss": 0.12463347613811493, + "epoch": 4.71, + "learning_rate": 1.2077954680199455e-05, + "loss": 0.1252, + "step": 4964, + "task_loss": 0.12991327047348022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793070957552423, + "compression_loss": 0.0, + "distillation_loss": 0.02803153544664383, + "epoch": 4.72, + "learning_rate": 1.2068834358912454e-05, + "loss": 0.0265, + "step": 4965, + "task_loss": 0.012602541595697403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930769636564927, + "compression_loss": 0.0, + "distillation_loss": 0.036852188408374786, + "epoch": 4.72, + "learning_rate": 1.2059716386633977e-05, + "loss": 0.0409, + "step": 4966, + "task_loss": 0.07686673104763031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930829662888261, + "compression_loss": 0.0, + "distillation_loss": 0.01830023154616356, + "epoch": 4.72, + "learning_rate": 1.205060076502034e-05, + "loss": 0.0173, + "step": 4967, + "task_loss": 0.00809447094798088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930889654504267, + "compression_loss": 0.0, + "distillation_loss": 0.22687265276908875, + "epoch": 4.72, + "learning_rate": 1.204148749572743e-05, + "loss": 0.2179, + "step": 4968, + "task_loss": 0.136878103017807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7930949611422985, + "compression_loss": 0.0, + "distillation_loss": 0.030822109431028366, + "epoch": 4.72, + "learning_rate": 1.2032376580410731e-05, + "loss": 0.0291, + "step": 4969, + "task_loss": 0.013280034065246582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931009533654448, + "compression_loss": 0.0, + "distillation_loss": 0.016060050576925278, + "epoch": 4.72, + "learning_rate": 1.2023268020725285e-05, + "loss": 0.0427, + "step": 4970, + "task_loss": 0.2822893261909485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931069421208696, + "compression_loss": 0.0, + "distillation_loss": 0.16523435711860657, + "epoch": 4.72, + "learning_rate": 1.2014161818325687e-05, + "loss": 0.1601, + "step": 4971, + "task_loss": 0.11406517028808594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931129274095764, + "compression_loss": 0.0, + "distillation_loss": 0.04404463618993759, + "epoch": 4.72, + "learning_rate": 1.2005057974866135e-05, + "loss": 0.047, + "step": 4972, + "task_loss": 0.07401682436466217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931189092325689, + "compression_loss": 0.0, + "distillation_loss": 0.03614489734172821, + "epoch": 4.72, + "learning_rate": 1.1995956492000397e-05, + "loss": 0.0331, + "step": 4973, + "task_loss": 0.005481433123350143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931248875908509, + "compression_loss": 0.0, + "distillation_loss": 0.048779651522636414, + "epoch": 4.72, + "learning_rate": 1.1986857371381788e-05, + "loss": 0.0466, + "step": 4974, + "task_loss": 0.027400122955441475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931308624854259, + "compression_loss": 0.0, + "distillation_loss": 0.03478461876511574, + "epoch": 4.72, + "learning_rate": 1.19777606146632e-05, + "loss": 0.0362, + "step": 4975, + "task_loss": 0.04934518039226532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931368339172977, + "compression_loss": 0.0, + "distillation_loss": 0.10953269898891449, + "epoch": 4.73, + "learning_rate": 1.1968666223497124e-05, + "loss": 0.1125, + "step": 4976, + "task_loss": 0.1394570916891098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79314280188747, + "compression_loss": 0.0, + "distillation_loss": 0.039646346122026443, + "epoch": 4.73, + "learning_rate": 1.1959574199535586e-05, + "loss": 0.0363, + "step": 4977, + "task_loss": 0.005907488986849785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931487663969464, + "compression_loss": 0.0, + "distillation_loss": 0.017676934599876404, + "epoch": 4.73, + "learning_rate": 1.1950484544430191e-05, + "loss": 0.0241, + "step": 4978, + "task_loss": 0.08169616758823395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931547274467307, + "compression_loss": 0.0, + "distillation_loss": 0.035570040345191956, + "epoch": 4.73, + "learning_rate": 1.1941397259832133e-05, + "loss": 0.0371, + "step": 4979, + "task_loss": 0.05074803903698921 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931606850378264, + "compression_loss": 0.0, + "distillation_loss": 0.058902185410261154, + "epoch": 4.73, + "learning_rate": 1.1932312347392154e-05, + "loss": 0.0549, + "step": 4980, + "task_loss": 0.019008172675967216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931666391712374, + "compression_loss": 0.0, + "distillation_loss": 0.014614107087254524, + "epoch": 4.73, + "learning_rate": 1.1923229808760564e-05, + "loss": 0.0136, + "step": 4981, + "task_loss": 0.0042775776237249374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931725898479672, + "compression_loss": 0.0, + "distillation_loss": 0.16022302210330963, + "epoch": 4.73, + "learning_rate": 1.1914149645587256e-05, + "loss": 0.1527, + "step": 4982, + "task_loss": 0.08545684814453125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931785370690195, + "compression_loss": 0.0, + "distillation_loss": 0.03780351206660271, + "epoch": 4.73, + "learning_rate": 1.1905071859521697e-05, + "loss": 0.0506, + "step": 4983, + "task_loss": 0.16610009968280792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931844808353982, + "compression_loss": 0.0, + "distillation_loss": 0.04032333940267563, + "epoch": 4.73, + "learning_rate": 1.1895996452212898e-05, + "loss": 0.0395, + "step": 4984, + "task_loss": 0.032581619918346405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931904211481067, + "compression_loss": 0.0, + "distillation_loss": 0.03943778946995735, + "epoch": 4.73, + "learning_rate": 1.1886923425309445e-05, + "loss": 0.0375, + "step": 4985, + "task_loss": 0.020389195531606674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7931963580081488, + "compression_loss": 0.0, + "distillation_loss": 0.0650925487279892, + "epoch": 4.74, + "learning_rate": 1.1877852780459518e-05, + "loss": 0.0637, + "step": 4986, + "task_loss": 0.05133982002735138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932022914165282, + "compression_loss": 0.0, + "distillation_loss": 0.023483579978346825, + "epoch": 4.74, + "learning_rate": 1.1868784519310826e-05, + "loss": 0.023, + "step": 4987, + "task_loss": 0.01817641593515873 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932082213742486, + "compression_loss": 0.0, + "distillation_loss": 0.038998499512672424, + "epoch": 4.74, + "learning_rate": 1.185971864351067e-05, + "loss": 0.0598, + "step": 4988, + "task_loss": 0.24691206216812134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932141478823136, + "compression_loss": 0.0, + "distillation_loss": 0.08431239426136017, + "epoch": 4.74, + "learning_rate": 1.1850655154705886e-05, + "loss": 0.078, + "step": 4989, + "task_loss": 0.020718101412057877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793220070941727, + "compression_loss": 0.0, + "distillation_loss": 0.01440352015197277, + "epoch": 4.74, + "learning_rate": 1.184159405454294e-05, + "loss": 0.0134, + "step": 4990, + "task_loss": 0.004175456240773201 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932259905534923, + "compression_loss": 0.0, + "distillation_loss": 0.039678964763879776, + "epoch": 4.74, + "learning_rate": 1.1832535344667806e-05, + "loss": 0.0511, + "step": 4991, + "task_loss": 0.1535629779100418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932319067186134, + "compression_loss": 0.0, + "distillation_loss": 0.187263622879982, + "epoch": 4.74, + "learning_rate": 1.1823479026726031e-05, + "loss": 0.1935, + "step": 4992, + "task_loss": 0.24967440962791443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932378194380938, + "compression_loss": 0.0, + "distillation_loss": 0.013006833381950855, + "epoch": 4.74, + "learning_rate": 1.1814425102362761e-05, + "loss": 0.0231, + "step": 4993, + "task_loss": 0.11390747874975204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932437287129374, + "compression_loss": 0.0, + "distillation_loss": 0.03500215336680412, + "epoch": 4.74, + "learning_rate": 1.1805373573222673e-05, + "loss": 0.0377, + "step": 4994, + "task_loss": 0.06191583350300789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932496345441475, + "compression_loss": 0.0, + "distillation_loss": 0.036350950598716736, + "epoch": 4.74, + "learning_rate": 1.1796324440950021e-05, + "loss": 0.0333, + "step": 4995, + "task_loss": 0.0061857327818870544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932555369327282, + "compression_loss": 0.0, + "distillation_loss": 0.053805820643901825, + "epoch": 4.74, + "learning_rate": 1.1787277707188616e-05, + "loss": 0.0542, + "step": 4996, + "task_loss": 0.058030955493450165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932614358796829, + "compression_loss": 0.0, + "distillation_loss": 0.12426917254924774, + "epoch": 4.75, + "learning_rate": 1.1778233373581857e-05, + "loss": 0.119, + "step": 4997, + "task_loss": 0.07191702723503113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932673313860155, + "compression_loss": 0.0, + "distillation_loss": 0.026698037981987, + "epoch": 4.75, + "learning_rate": 1.1769191441772672e-05, + "loss": 0.0249, + "step": 4998, + "task_loss": 0.008508618921041489 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932732234527294, + "compression_loss": 0.0, + "distillation_loss": 0.02993636205792427, + "epoch": 4.75, + "learning_rate": 1.1760151913403583e-05, + "loss": 0.0303, + "step": 4999, + "task_loss": 0.03406871110200882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932791120808286, + "compression_loss": 0.0, + "distillation_loss": 0.11071500182151794, + "epoch": 4.75, + "learning_rate": 1.1751114790116672e-05, + "loss": 0.1129, + "step": 5000, + "task_loss": 0.13242202997207642 + }, + { + "epoch": 4.75, + "eval_accuracy": 0.8967889908256881, + "eval_loss": 0.4209362268447876, + "eval_runtime": 17.9495, + "eval_samples_per_second": 48.581, + "eval_steps_per_second": 6.073, + "step": 5000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932849972713165, + "compression_loss": 0.0, + "distillation_loss": 0.022589676082134247, + "epoch": 4.75, + "learning_rate": 1.1742080073553565e-05, + "loss": 0.0305, + "step": 5001, + "task_loss": 0.10146450996398926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793290879025197, + "compression_loss": 0.0, + "distillation_loss": 0.13711608946323395, + "epoch": 4.75, + "learning_rate": 1.1733047765355466e-05, + "loss": 0.1323, + "step": 5002, + "task_loss": 0.08903352916240692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7932967573434737, + "compression_loss": 0.0, + "distillation_loss": 0.06641732901334763, + "epoch": 4.75, + "learning_rate": 1.1724017867163125e-05, + "loss": 0.0615, + "step": 5003, + "task_loss": 0.01706705428659916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933026322271504, + "compression_loss": 0.0, + "distillation_loss": 0.10634138435125351, + "epoch": 4.75, + "learning_rate": 1.1714990380616884e-05, + "loss": 0.1025, + "step": 5004, + "task_loss": 0.06803934276103973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933085036772304, + "compression_loss": 0.0, + "distillation_loss": 0.032394587993621826, + "epoch": 4.75, + "learning_rate": 1.1705965307356624e-05, + "loss": 0.0461, + "step": 5005, + "task_loss": 0.16980022192001343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933143716947177, + "compression_loss": 0.0, + "distillation_loss": 0.03210689127445221, + "epoch": 4.75, + "learning_rate": 1.169694264902178e-05, + "loss": 0.0516, + "step": 5006, + "task_loss": 0.22744594514369965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933202362806161, + "compression_loss": 0.0, + "distillation_loss": 0.05615242198109627, + "epoch": 4.75, + "learning_rate": 1.168792240725137e-05, + "loss": 0.0532, + "step": 5007, + "task_loss": 0.0271142590790987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793326097435929, + "compression_loss": 0.0, + "distillation_loss": 0.182695671916008, + "epoch": 4.76, + "learning_rate": 1.1678904583683979e-05, + "loss": 0.1831, + "step": 5008, + "task_loss": 0.18713723123073578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933319551616602, + "compression_loss": 0.0, + "distillation_loss": 0.07359650731086731, + "epoch": 4.76, + "learning_rate": 1.1669889179957725e-05, + "loss": 0.0844, + "step": 5009, + "task_loss": 0.1816483587026596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933378094588133, + "compression_loss": 0.0, + "distillation_loss": 0.11011994630098343, + "epoch": 4.76, + "learning_rate": 1.1660876197710288e-05, + "loss": 0.1121, + "step": 5010, + "task_loss": 0.13008421659469604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933436603283921, + "compression_loss": 0.0, + "distillation_loss": 0.02712424099445343, + "epoch": 4.76, + "learning_rate": 1.1651865638578944e-05, + "loss": 0.0252, + "step": 5011, + "task_loss": 0.008356472477316856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933495077714002, + "compression_loss": 0.0, + "distillation_loss": 0.058398209512233734, + "epoch": 4.76, + "learning_rate": 1.1642857504200491e-05, + "loss": 0.053, + "step": 5012, + "task_loss": 0.004138745367527008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933553517888414, + "compression_loss": 0.0, + "distillation_loss": 0.02487853169441223, + "epoch": 4.76, + "learning_rate": 1.1633851796211292e-05, + "loss": 0.0327, + "step": 5013, + "task_loss": 0.10304756462574005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933611923817193, + "compression_loss": 0.0, + "distillation_loss": 0.09818457067012787, + "epoch": 4.76, + "learning_rate": 1.1624848516247295e-05, + "loss": 0.0971, + "step": 5014, + "task_loss": 0.08704036474227905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933670295510376, + "compression_loss": 0.0, + "distillation_loss": 0.08643116801977158, + "epoch": 4.76, + "learning_rate": 1.1615847665943971e-05, + "loss": 0.0919, + "step": 5015, + "task_loss": 0.14145499467849731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933728632977998, + "compression_loss": 0.0, + "distillation_loss": 0.027642257511615753, + "epoch": 4.76, + "learning_rate": 1.1606849246936389e-05, + "loss": 0.0295, + "step": 5016, + "task_loss": 0.045945487916469574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933786936230098, + "compression_loss": 0.0, + "distillation_loss": 0.024577468633651733, + "epoch": 4.76, + "learning_rate": 1.1597853260859128e-05, + "loss": 0.026, + "step": 5017, + "task_loss": 0.039100244641304016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933845205276713, + "compression_loss": 0.0, + "distillation_loss": 0.0331391841173172, + "epoch": 4.77, + "learning_rate": 1.1588859709346384e-05, + "loss": 0.0595, + "step": 5018, + "task_loss": 0.29628539085388184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793390344012788, + "compression_loss": 0.0, + "distillation_loss": 0.03754423186182976, + "epoch": 4.77, + "learning_rate": 1.157986859403186e-05, + "loss": 0.0382, + "step": 5019, + "task_loss": 0.04380743205547333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7933961640793633, + "compression_loss": 0.0, + "distillation_loss": 0.11798399686813354, + "epoch": 4.77, + "learning_rate": 1.1570879916548827e-05, + "loss": 0.1147, + "step": 5020, + "task_loss": 0.0851517766714096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934019807284012, + "compression_loss": 0.0, + "distillation_loss": 0.034330565482378006, + "epoch": 4.77, + "learning_rate": 1.1561893678530141e-05, + "loss": 0.0472, + "step": 5021, + "task_loss": 0.16320303082466125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934077939609051, + "compression_loss": 0.0, + "distillation_loss": 0.04643068462610245, + "epoch": 4.77, + "learning_rate": 1.155290988160819e-05, + "loss": 0.0523, + "step": 5022, + "task_loss": 0.10536113381385803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793413603777879, + "compression_loss": 0.0, + "distillation_loss": 0.022104818373918533, + "epoch": 4.77, + "learning_rate": 1.154392852741491e-05, + "loss": 0.036, + "step": 5023, + "task_loss": 0.1612529158592224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934194101803264, + "compression_loss": 0.0, + "distillation_loss": 0.09352529048919678, + "epoch": 4.77, + "learning_rate": 1.153494961758182e-05, + "loss": 0.106, + "step": 5024, + "task_loss": 0.21867401897907257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934252131692509, + "compression_loss": 0.0, + "distillation_loss": 0.02358633652329445, + "epoch": 4.77, + "learning_rate": 1.1525973153739989e-05, + "loss": 0.0252, + "step": 5025, + "task_loss": 0.03958255797624588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934310127456563, + "compression_loss": 0.0, + "distillation_loss": 0.04170753434300423, + "epoch": 4.77, + "learning_rate": 1.1516999137520023e-05, + "loss": 0.0451, + "step": 5026, + "task_loss": 0.07600220292806625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934368089105464, + "compression_loss": 0.0, + "distillation_loss": 0.047912318259477615, + "epoch": 4.77, + "learning_rate": 1.1508027570552094e-05, + "loss": 0.0595, + "step": 5027, + "task_loss": 0.16383595764636993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934426016649245, + "compression_loss": 0.0, + "distillation_loss": 0.06721566617488861, + "epoch": 4.77, + "learning_rate": 1.1499058454465941e-05, + "loss": 0.0778, + "step": 5028, + "task_loss": 0.1731707900762558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934483910097948, + "compression_loss": 0.0, + "distillation_loss": 0.04140312969684601, + "epoch": 4.78, + "learning_rate": 1.1490091790890842e-05, + "loss": 0.0483, + "step": 5029, + "task_loss": 0.11057362705469131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934541769461606, + "compression_loss": 0.0, + "distillation_loss": 0.04200680926442146, + "epoch": 4.78, + "learning_rate": 1.1481127581455626e-05, + "loss": 0.0499, + "step": 5030, + "task_loss": 0.12135404348373413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934599594750257, + "compression_loss": 0.0, + "distillation_loss": 0.04865646734833717, + "epoch": 4.78, + "learning_rate": 1.1472165827788697e-05, + "loss": 0.0534, + "step": 5031, + "task_loss": 0.09640198945999146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934657385973938, + "compression_loss": 0.0, + "distillation_loss": 0.08559805154800415, + "epoch": 4.78, + "learning_rate": 1.1463206531518001e-05, + "loss": 0.0895, + "step": 5032, + "task_loss": 0.12415318191051483 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934715143142684, + "compression_loss": 0.0, + "distillation_loss": 0.04011882096529007, + "epoch": 4.78, + "learning_rate": 1.1454249694271021e-05, + "loss": 0.0579, + "step": 5033, + "task_loss": 0.21819335222244263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934772866266535, + "compression_loss": 0.0, + "distillation_loss": 0.08635386824607849, + "epoch": 4.78, + "learning_rate": 1.144529531767482e-05, + "loss": 0.09, + "step": 5034, + "task_loss": 0.12273044884204865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934830555355527, + "compression_loss": 0.0, + "distillation_loss": 0.02714727818965912, + "epoch": 4.78, + "learning_rate": 1.1436343403356017e-05, + "loss": 0.0251, + "step": 5035, + "task_loss": 0.006460065022110939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934888210419694, + "compression_loss": 0.0, + "distillation_loss": 0.080211341381073, + "epoch": 4.78, + "learning_rate": 1.1427393952940754e-05, + "loss": 0.0851, + "step": 5036, + "task_loss": 0.12943881750106812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7934945831469076, + "compression_loss": 0.0, + "distillation_loss": 0.04694737493991852, + "epoch": 4.78, + "learning_rate": 1.1418446968054741e-05, + "loss": 0.0637, + "step": 5037, + "task_loss": 0.21490800380706787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935003418513709, + "compression_loss": 0.0, + "distillation_loss": 0.1910262107849121, + "epoch": 4.78, + "learning_rate": 1.1409502450323254e-05, + "loss": 0.1973, + "step": 5038, + "task_loss": 0.25344181060791016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935060971563629, + "compression_loss": 0.0, + "distillation_loss": 0.08850134164094925, + "epoch": 4.79, + "learning_rate": 1.1400560401371097e-05, + "loss": 0.0971, + "step": 5039, + "task_loss": 0.17405250668525696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935118490628873, + "compression_loss": 0.0, + "distillation_loss": 0.023128552362322807, + "epoch": 4.79, + "learning_rate": 1.1391620822822629e-05, + "loss": 0.0234, + "step": 5040, + "task_loss": 0.025910664349794388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935175975719478, + "compression_loss": 0.0, + "distillation_loss": 0.02330312877893448, + "epoch": 4.79, + "learning_rate": 1.1382683716301781e-05, + "loss": 0.0262, + "step": 5041, + "task_loss": 0.0519491508603096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935233426845482, + "compression_loss": 0.0, + "distillation_loss": 0.029029665514826775, + "epoch": 4.79, + "learning_rate": 1.1373749083432025e-05, + "loss": 0.0293, + "step": 5042, + "task_loss": 0.03185213357210159 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793529084401692, + "compression_loss": 0.0, + "distillation_loss": 0.030532198026776314, + "epoch": 4.79, + "learning_rate": 1.1364816925836372e-05, + "loss": 0.0405, + "step": 5043, + "task_loss": 0.13049139082431793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793534822724383, + "compression_loss": 0.0, + "distillation_loss": 0.030790645629167557, + "epoch": 4.79, + "learning_rate": 1.1355887245137383e-05, + "loss": 0.0355, + "step": 5044, + "task_loss": 0.07748115062713623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935405576536249, + "compression_loss": 0.0, + "distillation_loss": 0.10416917502880096, + "epoch": 4.79, + "learning_rate": 1.1346960042957197e-05, + "loss": 0.1008, + "step": 5045, + "task_loss": 0.07013058662414551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935462891904211, + "compression_loss": 0.0, + "distillation_loss": 0.018903907388448715, + "epoch": 4.79, + "learning_rate": 1.1338035320917473e-05, + "loss": 0.0232, + "step": 5046, + "task_loss": 0.06144241988658905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935520173357757, + "compression_loss": 0.0, + "distillation_loss": 0.012658301740884781, + "epoch": 4.79, + "learning_rate": 1.1329113080639419e-05, + "loss": 0.029, + "step": 5047, + "task_loss": 0.17617405951023102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935577420906922, + "compression_loss": 0.0, + "distillation_loss": 0.025603320449590683, + "epoch": 4.79, + "learning_rate": 1.1320193323743824e-05, + "loss": 0.026, + "step": 5048, + "task_loss": 0.02996073290705681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935634634561742, + "compression_loss": 0.0, + "distillation_loss": 0.04193894937634468, + "epoch": 4.79, + "learning_rate": 1.1311276051850994e-05, + "loss": 0.0599, + "step": 5049, + "task_loss": 0.22172468900680542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935691814332255, + "compression_loss": 0.0, + "distillation_loss": 0.089395672082901, + "epoch": 4.8, + "learning_rate": 1.1302361266580786e-05, + "loss": 0.0867, + "step": 5050, + "task_loss": 0.0627622902393341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935748960228497, + "compression_loss": 0.0, + "distillation_loss": 0.03509131073951721, + "epoch": 4.8, + "learning_rate": 1.1293448969552623e-05, + "loss": 0.0442, + "step": 5051, + "task_loss": 0.12643598020076752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935806072260505, + "compression_loss": 0.0, + "distillation_loss": 0.035416148602962494, + "epoch": 4.8, + "learning_rate": 1.1284539162385474e-05, + "loss": 0.0432, + "step": 5052, + "task_loss": 0.1130717545747757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935863150438317, + "compression_loss": 0.0, + "distillation_loss": 0.038104936480522156, + "epoch": 4.8, + "learning_rate": 1.127563184669784e-05, + "loss": 0.0569, + "step": 5053, + "task_loss": 0.226011723279953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935920194771968, + "compression_loss": 0.0, + "distillation_loss": 0.07715682685375214, + "epoch": 4.8, + "learning_rate": 1.1266727024107771e-05, + "loss": 0.0907, + "step": 5054, + "task_loss": 0.2121979296207428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7935977205271496, + "compression_loss": 0.0, + "distillation_loss": 0.02819431945681572, + "epoch": 4.8, + "learning_rate": 1.1257824696232888e-05, + "loss": 0.0261, + "step": 5055, + "task_loss": 0.007308483123779297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936034181946937, + "compression_loss": 0.0, + "distillation_loss": 0.020847158506512642, + "epoch": 4.8, + "learning_rate": 1.124892486469033e-05, + "loss": 0.0193, + "step": 5056, + "task_loss": 0.005755210295319557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936091124808329, + "compression_loss": 0.0, + "distillation_loss": 0.03235051780939102, + "epoch": 4.8, + "learning_rate": 1.1240027531096786e-05, + "loss": 0.0299, + "step": 5057, + "task_loss": 0.007758015766739845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936148033865708, + "compression_loss": 0.0, + "distillation_loss": 0.19748008251190186, + "epoch": 4.8, + "learning_rate": 1.1231132697068523e-05, + "loss": 0.1925, + "step": 5058, + "task_loss": 0.14814594388008118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793620490912911, + "compression_loss": 0.0, + "distillation_loss": 0.013191369362175465, + "epoch": 4.8, + "learning_rate": 1.1222240364221303e-05, + "loss": 0.0211, + "step": 5059, + "task_loss": 0.09272238612174988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936261750608573, + "compression_loss": 0.0, + "distillation_loss": 0.05488280951976776, + "epoch": 4.81, + "learning_rate": 1.1213350534170488e-05, + "loss": 0.0512, + "step": 5060, + "task_loss": 0.018499650061130524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936318558314134, + "compression_loss": 0.0, + "distillation_loss": 0.025873934850096703, + "epoch": 4.81, + "learning_rate": 1.1204463208530936e-05, + "loss": 0.0244, + "step": 5061, + "task_loss": 0.01143595576286316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793637533225583, + "compression_loss": 0.0, + "distillation_loss": 0.05775732174515724, + "epoch": 4.81, + "learning_rate": 1.1195578388917092e-05, + "loss": 0.0579, + "step": 5062, + "task_loss": 0.05883469432592392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936432072443695, + "compression_loss": 0.0, + "distillation_loss": 0.058215852826833725, + "epoch": 4.81, + "learning_rate": 1.1186696076942916e-05, + "loss": 0.0542, + "step": 5063, + "task_loss": 0.018394893035292625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793648877888777, + "compression_loss": 0.0, + "distillation_loss": 0.07067988067865372, + "epoch": 4.81, + "learning_rate": 1.1177816274221911e-05, + "loss": 0.0808, + "step": 5064, + "task_loss": 0.17217287421226501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936545451598089, + "compression_loss": 0.0, + "distillation_loss": 0.03770400956273079, + "epoch": 4.81, + "learning_rate": 1.116893898236716e-05, + "loss": 0.0355, + "step": 5065, + "task_loss": 0.015920959413051605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793660209058469, + "compression_loss": 0.0, + "distillation_loss": 0.049285177141427994, + "epoch": 4.81, + "learning_rate": 1.1160064202991254e-05, + "loss": 0.0594, + "step": 5066, + "task_loss": 0.14998885989189148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793665869585761, + "compression_loss": 0.0, + "distillation_loss": 0.03805050998926163, + "epoch": 4.81, + "learning_rate": 1.115119193770633e-05, + "loss": 0.0408, + "step": 5067, + "task_loss": 0.06544838100671768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936715267426885, + "compression_loss": 0.0, + "distillation_loss": 0.021483074873685837, + "epoch": 4.81, + "learning_rate": 1.1142322188124088e-05, + "loss": 0.0442, + "step": 5068, + "task_loss": 0.24901482462882996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936771805302552, + "compression_loss": 0.0, + "distillation_loss": 0.11341410130262375, + "epoch": 4.81, + "learning_rate": 1.1133454955855768e-05, + "loss": 0.1099, + "step": 5069, + "task_loss": 0.0778903216123581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936828309494648, + "compression_loss": 0.0, + "distillation_loss": 0.1032809242606163, + "epoch": 4.81, + "learning_rate": 1.1124590242512137e-05, + "loss": 0.0993, + "step": 5070, + "task_loss": 0.06375472992658615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793688478001321, + "compression_loss": 0.0, + "distillation_loss": 0.04365667700767517, + "epoch": 4.82, + "learning_rate": 1.1115728049703503e-05, + "loss": 0.0504, + "step": 5071, + "task_loss": 0.11084698140621185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936941216868274, + "compression_loss": 0.0, + "distillation_loss": 0.015161161310970783, + "epoch": 4.82, + "learning_rate": 1.1106868379039747e-05, + "loss": 0.0141, + "step": 5072, + "task_loss": 0.004572039470076561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7936997620069878, + "compression_loss": 0.0, + "distillation_loss": 0.038352642208337784, + "epoch": 4.82, + "learning_rate": 1.109801123213026e-05, + "loss": 0.0374, + "step": 5073, + "task_loss": 0.02898401767015457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937053989628058, + "compression_loss": 0.0, + "distillation_loss": 0.07865383476018906, + "epoch": 4.82, + "learning_rate": 1.1089156610583985e-05, + "loss": 0.0769, + "step": 5074, + "task_loss": 0.061387475579977036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937110325552851, + "compression_loss": 0.0, + "distillation_loss": 0.024030856788158417, + "epoch": 4.82, + "learning_rate": 1.1080304516009399e-05, + "loss": 0.024, + "step": 5075, + "task_loss": 0.023738959804177284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937166627854294, + "compression_loss": 0.0, + "distillation_loss": 0.0827215313911438, + "epoch": 4.82, + "learning_rate": 1.1071454950014533e-05, + "loss": 0.1135, + "step": 5076, + "task_loss": 0.3905654847621918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937222896542424, + "compression_loss": 0.0, + "distillation_loss": 0.06419490277767181, + "epoch": 4.82, + "learning_rate": 1.1062607914206965e-05, + "loss": 0.071, + "step": 5077, + "task_loss": 0.13255758583545685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937279131627277, + "compression_loss": 0.0, + "distillation_loss": 0.044310204684734344, + "epoch": 4.82, + "learning_rate": 1.1053763410193788e-05, + "loss": 0.057, + "step": 5078, + "task_loss": 0.17076875269412994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793733533311889, + "compression_loss": 0.0, + "distillation_loss": 0.05891291797161102, + "epoch": 4.82, + "learning_rate": 1.104492143958166e-05, + "loss": 0.0659, + "step": 5079, + "task_loss": 0.1289033591747284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937391501027301, + "compression_loss": 0.0, + "distillation_loss": 0.05876559019088745, + "epoch": 4.82, + "learning_rate": 1.1036082003976759e-05, + "loss": 0.0594, + "step": 5080, + "task_loss": 0.06525548547506332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937447635362546, + "compression_loss": 0.0, + "distillation_loss": 0.1539030373096466, + "epoch": 4.83, + "learning_rate": 1.1027245104984816e-05, + "loss": 0.1464, + "step": 5081, + "task_loss": 0.07844631373882294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937503736134661, + "compression_loss": 0.0, + "distillation_loss": 0.12115428596735, + "epoch": 4.83, + "learning_rate": 1.1018410744211081e-05, + "loss": 0.1157, + "step": 5082, + "task_loss": 0.06699676811695099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937559803353684, + "compression_loss": 0.0, + "distillation_loss": 0.03977685421705246, + "epoch": 4.83, + "learning_rate": 1.100957892326038e-05, + "loss": 0.0385, + "step": 5083, + "task_loss": 0.027261190116405487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937615837029652, + "compression_loss": 0.0, + "distillation_loss": 0.1250099539756775, + "epoch": 4.83, + "learning_rate": 1.100074964373705e-05, + "loss": 0.1183, + "step": 5084, + "task_loss": 0.058270517736673355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937671837172601, + "compression_loss": 0.0, + "distillation_loss": 0.015824340283870697, + "epoch": 4.83, + "learning_rate": 1.0991922907244944e-05, + "loss": 0.0147, + "step": 5085, + "task_loss": 0.004903359338641167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937727803792568, + "compression_loss": 0.0, + "distillation_loss": 0.01482314057648182, + "epoch": 4.83, + "learning_rate": 1.0983098715387526e-05, + "loss": 0.0138, + "step": 5086, + "task_loss": 0.004439596086740494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793778373689959, + "compression_loss": 0.0, + "distillation_loss": 0.038871392607688904, + "epoch": 4.83, + "learning_rate": 1.0974277069767735e-05, + "loss": 0.044, + "step": 5087, + "task_loss": 0.09021726995706558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937839636503703, + "compression_loss": 0.0, + "distillation_loss": 0.0341789647936821, + "epoch": 4.83, + "learning_rate": 1.096545797198806e-05, + "loss": 0.045, + "step": 5088, + "task_loss": 0.14275771379470825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937895502614946, + "compression_loss": 0.0, + "distillation_loss": 0.02304195612668991, + "epoch": 4.83, + "learning_rate": 1.095664142365053e-05, + "loss": 0.0342, + "step": 5089, + "task_loss": 0.13422459363937378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7937951335243353, + "compression_loss": 0.0, + "distillation_loss": 0.027511442080140114, + "epoch": 4.83, + "learning_rate": 1.0947827426356725e-05, + "loss": 0.0257, + "step": 5090, + "task_loss": 0.009677274152636528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938007134398962, + "compression_loss": 0.0, + "distillation_loss": 0.039502017199993134, + "epoch": 4.83, + "learning_rate": 1.0939015981707746e-05, + "loss": 0.054, + "step": 5091, + "task_loss": 0.18460404872894287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938062900091811, + "compression_loss": 0.0, + "distillation_loss": 0.03704231232404709, + "epoch": 4.84, + "learning_rate": 1.093020709130422e-05, + "loss": 0.0372, + "step": 5092, + "task_loss": 0.038229990750551224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938118632331935, + "compression_loss": 0.0, + "distillation_loss": 0.05441391468048096, + "epoch": 4.84, + "learning_rate": 1.0921400756746338e-05, + "loss": 0.0582, + "step": 5093, + "task_loss": 0.09189236164093018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938174331129373, + "compression_loss": 0.0, + "distillation_loss": 0.1751764416694641, + "epoch": 4.84, + "learning_rate": 1.0912596979633824e-05, + "loss": 0.173, + "step": 5094, + "task_loss": 0.15349790453910828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938229996494159, + "compression_loss": 0.0, + "distillation_loss": 0.03634584695100784, + "epoch": 4.84, + "learning_rate": 1.0903795761565914e-05, + "loss": 0.0338, + "step": 5095, + "task_loss": 0.010769536718726158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938285628436332, + "compression_loss": 0.0, + "distillation_loss": 0.06358032673597336, + "epoch": 4.84, + "learning_rate": 1.0894997104141378e-05, + "loss": 0.0612, + "step": 5096, + "task_loss": 0.03997098281979561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938341226965929, + "compression_loss": 0.0, + "distillation_loss": 0.06305442750453949, + "epoch": 4.84, + "learning_rate": 1.088620100895856e-05, + "loss": 0.0676, + "step": 5097, + "task_loss": 0.10872484743595123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938396792092985, + "compression_loss": 0.0, + "distillation_loss": 0.02636984921991825, + "epoch": 4.84, + "learning_rate": 1.0877407477615304e-05, + "loss": 0.025, + "step": 5098, + "task_loss": 0.013119250535964966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938452323827537, + "compression_loss": 0.0, + "distillation_loss": 0.04463554173707962, + "epoch": 4.84, + "learning_rate": 1.0868616511708982e-05, + "loss": 0.0485, + "step": 5099, + "task_loss": 0.0837230458855629 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938507822179625, + "compression_loss": 0.0, + "distillation_loss": 0.045190710574388504, + "epoch": 4.84, + "learning_rate": 1.085982811283654e-05, + "loss": 0.0637, + "step": 5100, + "task_loss": 0.23051756620407104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938563287159282, + "compression_loss": 0.0, + "distillation_loss": 0.05444833263754845, + "epoch": 4.84, + "learning_rate": 1.085104228259442e-05, + "loss": 0.0613, + "step": 5101, + "task_loss": 0.12304200232028961 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938618718776546, + "compression_loss": 0.0, + "distillation_loss": 0.03785646706819534, + "epoch": 4.85, + "learning_rate": 1.0842259022578602e-05, + "loss": 0.0397, + "step": 5102, + "task_loss": 0.05668614059686661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938674117041453, + "compression_loss": 0.0, + "distillation_loss": 0.04682941734790802, + "epoch": 4.85, + "learning_rate": 1.0833478334384617e-05, + "loss": 0.0546, + "step": 5103, + "task_loss": 0.12421214580535889 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938729481964042, + "compression_loss": 0.0, + "distillation_loss": 0.09900999814271927, + "epoch": 4.85, + "learning_rate": 1.082470021960753e-05, + "loss": 0.0984, + "step": 5104, + "task_loss": 0.09260520339012146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938784813554349, + "compression_loss": 0.0, + "distillation_loss": 0.021003127098083496, + "epoch": 4.85, + "learning_rate": 1.0815924679841916e-05, + "loss": 0.0387, + "step": 5105, + "task_loss": 0.19845835864543915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938840111822411, + "compression_loss": 0.0, + "distillation_loss": 0.038200899958610535, + "epoch": 4.85, + "learning_rate": 1.0807151716681885e-05, + "loss": 0.0421, + "step": 5106, + "task_loss": 0.0776185691356659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938895376778262, + "compression_loss": 0.0, + "distillation_loss": 0.016974829137325287, + "epoch": 4.85, + "learning_rate": 1.0798381331721109e-05, + "loss": 0.03, + "step": 5107, + "task_loss": 0.14757771790027618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7938950608431943, + "compression_loss": 0.0, + "distillation_loss": 0.038715705275535583, + "epoch": 4.85, + "learning_rate": 1.0789613526552758e-05, + "loss": 0.0371, + "step": 5108, + "task_loss": 0.02297482080757618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939005806793489, + "compression_loss": 0.0, + "distillation_loss": 0.029004612937569618, + "epoch": 4.85, + "learning_rate": 1.0780848302769542e-05, + "loss": 0.036, + "step": 5109, + "task_loss": 0.09945765137672424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939060971872937, + "compression_loss": 0.0, + "distillation_loss": 0.018345588818192482, + "epoch": 4.85, + "learning_rate": 1.0772085661963708e-05, + "loss": 0.0224, + "step": 5110, + "task_loss": 0.05867563933134079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939116103680323, + "compression_loss": 0.0, + "distillation_loss": 0.026313165202736855, + "epoch": 4.85, + "learning_rate": 1.0763325605727048e-05, + "loss": 0.024, + "step": 5111, + "task_loss": 0.003129279240965843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939171202225684, + "compression_loss": 0.0, + "distillation_loss": 0.0785897821187973, + "epoch": 4.85, + "learning_rate": 1.0754568135650856e-05, + "loss": 0.0924, + "step": 5112, + "task_loss": 0.2167954295873642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939226267519057, + "compression_loss": 0.0, + "distillation_loss": 0.028556518256664276, + "epoch": 4.86, + "learning_rate": 1.0745813253325957e-05, + "loss": 0.0266, + "step": 5113, + "task_loss": 0.0085418950766325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939281299570481, + "compression_loss": 0.0, + "distillation_loss": 0.020152829587459564, + "epoch": 4.86, + "learning_rate": 1.073706096034274e-05, + "loss": 0.0343, + "step": 5114, + "task_loss": 0.161162868142128 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793933629838999, + "compression_loss": 0.0, + "distillation_loss": 0.03302818909287453, + "epoch": 4.86, + "learning_rate": 1.072831125829109e-05, + "loss": 0.0397, + "step": 5115, + "task_loss": 0.09927168488502502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939391263987621, + "compression_loss": 0.0, + "distillation_loss": 0.027194611728191376, + "epoch": 4.86, + "learning_rate": 1.0719564148760427e-05, + "loss": 0.0256, + "step": 5116, + "task_loss": 0.011119823902845383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939446196373412, + "compression_loss": 0.0, + "distillation_loss": 0.03556310385465622, + "epoch": 4.86, + "learning_rate": 1.0710819633339719e-05, + "loss": 0.0339, + "step": 5117, + "task_loss": 0.018816489726305008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939501095557399, + "compression_loss": 0.0, + "distillation_loss": 0.3605664372444153, + "epoch": 4.86, + "learning_rate": 1.0702077713617442e-05, + "loss": 0.3543, + "step": 5118, + "task_loss": 0.2975212633609772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939555961549619, + "compression_loss": 0.0, + "distillation_loss": 0.02403501234948635, + "epoch": 4.86, + "learning_rate": 1.0693338391181598e-05, + "loss": 0.0224, + "step": 5119, + "task_loss": 0.007670925930142403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793961079436011, + "compression_loss": 0.0, + "distillation_loss": 0.13072296977043152, + "epoch": 4.86, + "learning_rate": 1.0684601667619736e-05, + "loss": 0.1322, + "step": 5120, + "task_loss": 0.1455729752779007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939665593998908, + "compression_loss": 0.0, + "distillation_loss": 0.03021019697189331, + "epoch": 4.86, + "learning_rate": 1.0675867544518935e-05, + "loss": 0.0285, + "step": 5121, + "task_loss": 0.01261189952492714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939720360476049, + "compression_loss": 0.0, + "distillation_loss": 0.09756823629140854, + "epoch": 4.86, + "learning_rate": 1.0667136023465782e-05, + "loss": 0.0941, + "step": 5122, + "task_loss": 0.06310638785362244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.793977509380157, + "compression_loss": 0.0, + "distillation_loss": 0.020023830235004425, + "epoch": 4.87, + "learning_rate": 1.0658407106046389e-05, + "loss": 0.0209, + "step": 5123, + "task_loss": 0.029044657945632935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939829793985509, + "compression_loss": 0.0, + "distillation_loss": 0.06327167898416519, + "epoch": 4.87, + "learning_rate": 1.0649680793846423e-05, + "loss": 0.0785, + "step": 5124, + "task_loss": 0.21557585895061493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939884461037902, + "compression_loss": 0.0, + "distillation_loss": 0.0167409535497427, + "epoch": 4.87, + "learning_rate": 1.0640957088451054e-05, + "loss": 0.0207, + "step": 5125, + "task_loss": 0.056279633194208145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939939094968785, + "compression_loss": 0.0, + "distillation_loss": 0.03627139329910278, + "epoch": 4.87, + "learning_rate": 1.0632235991444972e-05, + "loss": 0.0342, + "step": 5126, + "task_loss": 0.01587931625545025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7939993695788197, + "compression_loss": 0.0, + "distillation_loss": 0.023063641041517258, + "epoch": 4.87, + "learning_rate": 1.0623517504412429e-05, + "loss": 0.0296, + "step": 5127, + "task_loss": 0.08890461921691895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940048263506173, + "compression_loss": 0.0, + "distillation_loss": 0.021907242015004158, + "epoch": 4.87, + "learning_rate": 1.061480162893716e-05, + "loss": 0.0297, + "step": 5128, + "task_loss": 0.09995594620704651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940102798132751, + "compression_loss": 0.0, + "distillation_loss": 0.05496376007795334, + "epoch": 4.87, + "learning_rate": 1.0606088366602462e-05, + "loss": 0.0606, + "step": 5129, + "task_loss": 0.11096987873315811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940157299677967, + "compression_loss": 0.0, + "distillation_loss": 0.04084121435880661, + "epoch": 4.87, + "learning_rate": 1.0597377718991127e-05, + "loss": 0.0394, + "step": 5130, + "task_loss": 0.02599833346903324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940211768151857, + "compression_loss": 0.0, + "distillation_loss": 0.08682038635015488, + "epoch": 4.87, + "learning_rate": 1.0588669687685502e-05, + "loss": 0.0918, + "step": 5131, + "task_loss": 0.13667771220207214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794026620356446, + "compression_loss": 0.0, + "distillation_loss": 0.056324686855077744, + "epoch": 4.87, + "learning_rate": 1.0579964274267434e-05, + "loss": 0.0604, + "step": 5132, + "task_loss": 0.0968530923128128 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940320605925811, + "compression_loss": 0.0, + "distillation_loss": 0.022295914590358734, + "epoch": 4.87, + "learning_rate": 1.0571261480318295e-05, + "loss": 0.0266, + "step": 5133, + "task_loss": 0.0648619681596756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940374975245948, + "compression_loss": 0.0, + "distillation_loss": 0.019165407866239548, + "epoch": 4.88, + "learning_rate": 1.0562561307419005e-05, + "loss": 0.0193, + "step": 5134, + "task_loss": 0.020621633157134056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940429311534907, + "compression_loss": 0.0, + "distillation_loss": 0.10466399788856506, + "epoch": 4.88, + "learning_rate": 1.0553863757149985e-05, + "loss": 0.1121, + "step": 5135, + "task_loss": 0.17872130870819092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940483614802726, + "compression_loss": 0.0, + "distillation_loss": 0.0792841836810112, + "epoch": 4.88, + "learning_rate": 1.0545168831091178e-05, + "loss": 0.0759, + "step": 5136, + "task_loss": 0.04564621299505234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794053788505944, + "compression_loss": 0.0, + "distillation_loss": 0.038627780973911285, + "epoch": 4.88, + "learning_rate": 1.0536476530822071e-05, + "loss": 0.0547, + "step": 5137, + "task_loss": 0.19965079426765442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940592122315087, + "compression_loss": 0.0, + "distillation_loss": 0.030025865882635117, + "epoch": 4.88, + "learning_rate": 1.0527786857921665e-05, + "loss": 0.0306, + "step": 5138, + "task_loss": 0.03600824251770973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940646326579703, + "compression_loss": 0.0, + "distillation_loss": 0.054387472569942474, + "epoch": 4.88, + "learning_rate": 1.0519099813968475e-05, + "loss": 0.0584, + "step": 5139, + "task_loss": 0.0948576033115387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940700497863327, + "compression_loss": 0.0, + "distillation_loss": 0.04773625731468201, + "epoch": 4.88, + "learning_rate": 1.0510415400540538e-05, + "loss": 0.0494, + "step": 5140, + "task_loss": 0.06462717801332474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940754636175993, + "compression_loss": 0.0, + "distillation_loss": 0.06973198801279068, + "epoch": 4.88, + "learning_rate": 1.0501733619215432e-05, + "loss": 0.0686, + "step": 5141, + "task_loss": 0.05870828405022621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940808741527738, + "compression_loss": 0.0, + "distillation_loss": 0.08635716885328293, + "epoch": 4.88, + "learning_rate": 1.049305447157024e-05, + "loss": 0.1117, + "step": 5142, + "task_loss": 0.34026485681533813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940862813928602, + "compression_loss": 0.0, + "distillation_loss": 0.019729439169168472, + "epoch": 4.88, + "learning_rate": 1.048437795918156e-05, + "loss": 0.0181, + "step": 5143, + "task_loss": 0.0036925822496414185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940916853388619, + "compression_loss": 0.0, + "distillation_loss": 0.03241331875324249, + "epoch": 4.89, + "learning_rate": 1.0475704083625543e-05, + "loss": 0.0368, + "step": 5144, + "task_loss": 0.07607822120189667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7940970859917826, + "compression_loss": 0.0, + "distillation_loss": 0.033501721918582916, + "epoch": 4.89, + "learning_rate": 1.0467032846477818e-05, + "loss": 0.0338, + "step": 5145, + "task_loss": 0.03676898777484894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941024833526261, + "compression_loss": 0.0, + "distillation_loss": 0.04217281937599182, + "epoch": 4.89, + "learning_rate": 1.0458364249313577e-05, + "loss": 0.0449, + "step": 5146, + "task_loss": 0.06914474070072174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941078774223961, + "compression_loss": 0.0, + "distillation_loss": 0.06948964297771454, + "epoch": 4.89, + "learning_rate": 1.0449698293707494e-05, + "loss": 0.0717, + "step": 5147, + "task_loss": 0.09196187555789948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794113268202096, + "compression_loss": 0.0, + "distillation_loss": 0.16506507992744446, + "epoch": 4.89, + "learning_rate": 1.0441034981233802e-05, + "loss": 0.1604, + "step": 5148, + "task_loss": 0.11810018122196198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941186556927298, + "compression_loss": 0.0, + "distillation_loss": 0.05315421149134636, + "epoch": 4.89, + "learning_rate": 1.043237431346622e-05, + "loss": 0.0599, + "step": 5149, + "task_loss": 0.12041810899972916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794124039895301, + "compression_loss": 0.0, + "distillation_loss": 0.1707717776298523, + "epoch": 4.89, + "learning_rate": 1.0423716291977995e-05, + "loss": 0.1788, + "step": 5150, + "task_loss": 0.2513020634651184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941294208108134, + "compression_loss": 0.0, + "distillation_loss": 0.08805613964796066, + "epoch": 4.89, + "learning_rate": 1.0415060918341915e-05, + "loss": 0.0987, + "step": 5151, + "task_loss": 0.1942642778158188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941347984402707, + "compression_loss": 0.0, + "distillation_loss": 0.017222080379724503, + "epoch": 4.89, + "learning_rate": 1.0406408194130259e-05, + "loss": 0.026, + "step": 5152, + "task_loss": 0.10548414289951324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941401727846764, + "compression_loss": 0.0, + "distillation_loss": 0.15333212912082672, + "epoch": 4.89, + "learning_rate": 1.0397758120914838e-05, + "loss": 0.1523, + "step": 5153, + "task_loss": 0.1433287113904953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941455438450343, + "compression_loss": 0.0, + "distillation_loss": 0.036811452358961105, + "epoch": 4.89, + "learning_rate": 1.0389110700266965e-05, + "loss": 0.0339, + "step": 5154, + "task_loss": 0.007728196680545807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941509116223481, + "compression_loss": 0.0, + "distillation_loss": 0.02540665678679943, + "epoch": 4.9, + "learning_rate": 1.0380465933757516e-05, + "loss": 0.0312, + "step": 5155, + "task_loss": 0.08360700309276581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941562761176214, + "compression_loss": 0.0, + "distillation_loss": 0.036480970680713654, + "epoch": 4.9, + "learning_rate": 1.037182382295684e-05, + "loss": 0.0338, + "step": 5156, + "task_loss": 0.009625611826777458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794161637331858, + "compression_loss": 0.0, + "distillation_loss": 0.16506238281726837, + "epoch": 4.9, + "learning_rate": 1.0363184369434803e-05, + "loss": 0.1557, + "step": 5157, + "task_loss": 0.07142955809831619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941669952660615, + "compression_loss": 0.0, + "distillation_loss": 0.13784891366958618, + "epoch": 4.9, + "learning_rate": 1.0354547574760828e-05, + "loss": 0.1407, + "step": 5158, + "task_loss": 0.16680915653705597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941723499212356, + "compression_loss": 0.0, + "distillation_loss": 0.05448282137513161, + "epoch": 4.9, + "learning_rate": 1.034591344050382e-05, + "loss": 0.0615, + "step": 5159, + "task_loss": 0.125069722533226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941777012983839, + "compression_loss": 0.0, + "distillation_loss": 0.0830635204911232, + "epoch": 4.9, + "learning_rate": 1.0337281968232213e-05, + "loss": 0.0912, + "step": 5160, + "task_loss": 0.1648905873298645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941830493985104, + "compression_loss": 0.0, + "distillation_loss": 0.09780248999595642, + "epoch": 4.9, + "learning_rate": 1.032865315951394e-05, + "loss": 0.1198, + "step": 5161, + "task_loss": 0.3177647292613983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941883942226183, + "compression_loss": 0.0, + "distillation_loss": 0.030118845403194427, + "epoch": 4.9, + "learning_rate": 1.0320027015916481e-05, + "loss": 0.0312, + "step": 5162, + "task_loss": 0.04085350036621094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7941937357717117, + "compression_loss": 0.0, + "distillation_loss": 0.019481580704450607, + "epoch": 4.9, + "learning_rate": 1.0311403539006823e-05, + "loss": 0.0233, + "step": 5163, + "task_loss": 0.05726565048098564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794199074046794, + "compression_loss": 0.0, + "distillation_loss": 0.08498305082321167, + "epoch": 4.9, + "learning_rate": 1.0302782730351443e-05, + "loss": 0.0813, + "step": 5164, + "task_loss": 0.047981880605220795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794204409048869, + "compression_loss": 0.0, + "distillation_loss": 0.09883970767259598, + "epoch": 4.91, + "learning_rate": 1.0294164591516372e-05, + "loss": 0.1042, + "step": 5165, + "task_loss": 0.15241563320159912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942097407789405, + "compression_loss": 0.0, + "distillation_loss": 0.13842526078224182, + "epoch": 4.91, + "learning_rate": 1.0285549124067126e-05, + "loss": 0.1384, + "step": 5166, + "task_loss": 0.13855035603046417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794215069238012, + "compression_loss": 0.0, + "distillation_loss": 0.2038910686969757, + "epoch": 4.91, + "learning_rate": 1.0276936329568748e-05, + "loss": 0.2098, + "step": 5167, + "task_loss": 0.26268768310546875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942203944270871, + "compression_loss": 0.0, + "distillation_loss": 0.09594913572072983, + "epoch": 4.91, + "learning_rate": 1.0268326209585784e-05, + "loss": 0.0974, + "step": 5168, + "task_loss": 0.11042402684688568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942257163471699, + "compression_loss": 0.0, + "distillation_loss": 0.09930403530597687, + "epoch": 4.91, + "learning_rate": 1.0259718765682321e-05, + "loss": 0.1025, + "step": 5169, + "task_loss": 0.13175342977046967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942310349992636, + "compression_loss": 0.0, + "distillation_loss": 0.19986680150032043, + "epoch": 4.91, + "learning_rate": 1.0251113999421935e-05, + "loss": 0.1976, + "step": 5170, + "task_loss": 0.1776677966117859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942363503843721, + "compression_loss": 0.0, + "distillation_loss": 0.07311776280403137, + "epoch": 4.91, + "learning_rate": 1.0242511912367706e-05, + "loss": 0.0678, + "step": 5171, + "task_loss": 0.019507795572280884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942416625034991, + "compression_loss": 0.0, + "distillation_loss": 0.041407689452171326, + "epoch": 4.91, + "learning_rate": 1.0233912506082277e-05, + "loss": 0.0407, + "step": 5172, + "task_loss": 0.03435212001204491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942469713576482, + "compression_loss": 0.0, + "distillation_loss": 0.03155471384525299, + "epoch": 4.91, + "learning_rate": 1.0225315782127756e-05, + "loss": 0.0368, + "step": 5173, + "task_loss": 0.0840642973780632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942522769478233, + "compression_loss": 0.0, + "distillation_loss": 0.059278104454278946, + "epoch": 4.91, + "learning_rate": 1.0216721742065777e-05, + "loss": 0.0573, + "step": 5174, + "task_loss": 0.03901209309697151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942575792750277, + "compression_loss": 0.0, + "distillation_loss": 0.09744984656572342, + "epoch": 4.91, + "learning_rate": 1.0208130387457485e-05, + "loss": 0.1025, + "step": 5175, + "task_loss": 0.14773344993591309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942628783402654, + "compression_loss": 0.0, + "distillation_loss": 0.04598250985145569, + "epoch": 4.92, + "learning_rate": 1.0199541719863554e-05, + "loss": 0.0494, + "step": 5176, + "task_loss": 0.08029758185148239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79426817414454, + "compression_loss": 0.0, + "distillation_loss": 0.06423640996217728, + "epoch": 4.92, + "learning_rate": 1.0190955740844147e-05, + "loss": 0.0723, + "step": 5177, + "task_loss": 0.1453644037246704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942734666888551, + "compression_loss": 0.0, + "distillation_loss": 0.07094942778348923, + "epoch": 4.92, + "learning_rate": 1.0182372451958943e-05, + "loss": 0.076, + "step": 5178, + "task_loss": 0.12134627997875214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942787559742146, + "compression_loss": 0.0, + "distillation_loss": 0.11129570007324219, + "epoch": 4.92, + "learning_rate": 1.0173791854767155e-05, + "loss": 0.1065, + "step": 5179, + "task_loss": 0.06368857622146606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942840420016218, + "compression_loss": 0.0, + "distillation_loss": 0.04373692348599434, + "epoch": 4.92, + "learning_rate": 1.0165213950827473e-05, + "loss": 0.0422, + "step": 5180, + "task_loss": 0.028108179569244385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942893247720808, + "compression_loss": 0.0, + "distillation_loss": 0.06466871500015259, + "epoch": 4.92, + "learning_rate": 1.015663874169813e-05, + "loss": 0.0863, + "step": 5181, + "task_loss": 0.28089067339897156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794294604286595, + "compression_loss": 0.0, + "distillation_loss": 0.04316772520542145, + "epoch": 4.92, + "learning_rate": 1.0148066228936836e-05, + "loss": 0.0488, + "step": 5182, + "task_loss": 0.09936368465423584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7942998805461682, + "compression_loss": 0.0, + "distillation_loss": 0.05181555822491646, + "epoch": 4.92, + "learning_rate": 1.013949641410085e-05, + "loss": 0.0584, + "step": 5183, + "task_loss": 0.117317795753479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794305153551804, + "compression_loss": 0.0, + "distillation_loss": 0.020150721073150635, + "epoch": 4.92, + "learning_rate": 1.0130929298746909e-05, + "loss": 0.0317, + "step": 5184, + "task_loss": 0.1358027458190918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943104233045062, + "compression_loss": 0.0, + "distillation_loss": 0.06334016472101212, + "epoch": 4.92, + "learning_rate": 1.0122364884431259e-05, + "loss": 0.0633, + "step": 5185, + "task_loss": 0.062485143542289734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943156898052783, + "compression_loss": 0.0, + "distillation_loss": 0.05838319659233093, + "epoch": 4.92, + "learning_rate": 1.011380317270969e-05, + "loss": 0.0593, + "step": 5186, + "task_loss": 0.06781870126724243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943209530551243, + "compression_loss": 0.0, + "distillation_loss": 0.033147186040878296, + "epoch": 4.93, + "learning_rate": 1.0105244165137467e-05, + "loss": 0.0457, + "step": 5187, + "task_loss": 0.15893574059009552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943262130550476, + "compression_loss": 0.0, + "distillation_loss": 0.026804693043231964, + "epoch": 4.93, + "learning_rate": 1.0096687863269368e-05, + "loss": 0.0248, + "step": 5188, + "task_loss": 0.006701400503516197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943314698060518, + "compression_loss": 0.0, + "distillation_loss": 0.04605764150619507, + "epoch": 4.93, + "learning_rate": 1.0088134268659693e-05, + "loss": 0.0498, + "step": 5189, + "task_loss": 0.08354970067739487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794336723309141, + "compression_loss": 0.0, + "distillation_loss": 0.033246997743844986, + "epoch": 4.93, + "learning_rate": 1.0079583382862254e-05, + "loss": 0.0326, + "step": 5190, + "task_loss": 0.026343755424022675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943419735653184, + "compression_loss": 0.0, + "distillation_loss": 0.06967267394065857, + "epoch": 4.93, + "learning_rate": 1.0071035207430352e-05, + "loss": 0.0715, + "step": 5191, + "task_loss": 0.08810234814882278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943472205755882, + "compression_loss": 0.0, + "distillation_loss": 0.02802272140979767, + "epoch": 4.93, + "learning_rate": 1.0062489743916792e-05, + "loss": 0.0287, + "step": 5192, + "task_loss": 0.03505971282720566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943524643409535, + "compression_loss": 0.0, + "distillation_loss": 0.07618989050388336, + "epoch": 4.93, + "learning_rate": 1.005394699387392e-05, + "loss": 0.0715, + "step": 5193, + "task_loss": 0.029166080057621002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943577048624184, + "compression_loss": 0.0, + "distillation_loss": 0.033866822719573975, + "epoch": 4.93, + "learning_rate": 1.0045406958853556e-05, + "loss": 0.0316, + "step": 5194, + "task_loss": 0.010730434209108353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943629421409865, + "compression_loss": 0.0, + "distillation_loss": 0.03229956328868866, + "epoch": 4.93, + "learning_rate": 1.0036869640407031e-05, + "loss": 0.0305, + "step": 5195, + "task_loss": 0.01465969905257225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943681761776614, + "compression_loss": 0.0, + "distillation_loss": 0.040657661855220795, + "epoch": 4.93, + "learning_rate": 1.002833504008521e-05, + "loss": 0.0535, + "step": 5196, + "task_loss": 0.16867557168006897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943734069734468, + "compression_loss": 0.0, + "distillation_loss": 0.12660034000873566, + "epoch": 4.94, + "learning_rate": 1.0019803159438423e-05, + "loss": 0.1192, + "step": 5197, + "task_loss": 0.05233707278966904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943786345293464, + "compression_loss": 0.0, + "distillation_loss": 0.24157142639160156, + "epoch": 4.94, + "learning_rate": 1.0011274000016547e-05, + "loss": 0.237, + "step": 5198, + "task_loss": 0.19573716819286346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943838588463639, + "compression_loss": 0.0, + "distillation_loss": 0.10286252945661545, + "epoch": 4.94, + "learning_rate": 1.0002747563368928e-05, + "loss": 0.1052, + "step": 5199, + "task_loss": 0.12575237452983856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794389079925503, + "compression_loss": 0.0, + "distillation_loss": 0.1292041838169098, + "epoch": 4.94, + "learning_rate": 9.994223851044449e-06, + "loss": 0.1243, + "step": 5200, + "task_loss": 0.07981815189123154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943942977677673, + "compression_loss": 0.0, + "distillation_loss": 0.04530710726976395, + "epoch": 4.94, + "learning_rate": 9.985702864591477e-06, + "loss": 0.0477, + "step": 5201, + "task_loss": 0.06911186873912811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7943995123741605, + "compression_loss": 0.0, + "distillation_loss": 0.04243285953998566, + "epoch": 4.94, + "learning_rate": 9.977184605557877e-06, + "loss": 0.0385, + "step": 5202, + "task_loss": 0.003344038501381874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944047237456864, + "compression_loss": 0.0, + "distillation_loss": 0.01720603182911873, + "epoch": 4.94, + "learning_rate": 9.968669075491054e-06, + "loss": 0.0204, + "step": 5203, + "task_loss": 0.04868287593126297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944099318833486, + "compression_loss": 0.0, + "distillation_loss": 0.02283567562699318, + "epoch": 4.94, + "learning_rate": 9.960156275937887e-06, + "loss": 0.0279, + "step": 5204, + "task_loss": 0.07395943999290466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944151367881507, + "compression_loss": 0.0, + "distillation_loss": 0.012866092845797539, + "epoch": 4.94, + "learning_rate": 9.951646208444756e-06, + "loss": 0.0119, + "step": 5205, + "task_loss": 0.0030320733785629272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944203384610965, + "compression_loss": 0.0, + "distillation_loss": 0.03689184784889221, + "epoch": 4.94, + "learning_rate": 9.943138874557564e-06, + "loss": 0.0415, + "step": 5206, + "task_loss": 0.0826156958937645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944255369031896, + "compression_loss": 0.0, + "distillation_loss": 0.01845708116889, + "epoch": 4.94, + "learning_rate": 9.934634275821722e-06, + "loss": 0.0175, + "step": 5207, + "task_loss": 0.009108863770961761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944307321154338, + "compression_loss": 0.0, + "distillation_loss": 0.01570175029337406, + "epoch": 4.95, + "learning_rate": 9.926132413782116e-06, + "loss": 0.0147, + "step": 5208, + "task_loss": 0.006071802228689194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944359240988327, + "compression_loss": 0.0, + "distillation_loss": 0.013230700977146626, + "epoch": 4.95, + "learning_rate": 9.917633289983144e-06, + "loss": 0.0125, + "step": 5209, + "task_loss": 0.0057407524436712265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.79444111285439, + "compression_loss": 0.0, + "distillation_loss": 0.035949304699897766, + "epoch": 4.95, + "learning_rate": 9.909136905968736e-06, + "loss": 0.0461, + "step": 5210, + "task_loss": 0.13782669603824615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944462983831093, + "compression_loss": 0.0, + "distillation_loss": 0.03853258118033409, + "epoch": 4.95, + "learning_rate": 9.900643263282283e-06, + "loss": 0.0386, + "step": 5211, + "task_loss": 0.03873224928975105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944514806859944, + "compression_loss": 0.0, + "distillation_loss": 0.036589786410331726, + "epoch": 4.95, + "learning_rate": 9.892152363466691e-06, + "loss": 0.036, + "step": 5212, + "task_loss": 0.03055756166577339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944566597640489, + "compression_loss": 0.0, + "distillation_loss": 0.03954382613301277, + "epoch": 4.95, + "learning_rate": 9.883664208064394e-06, + "loss": 0.0415, + "step": 5213, + "task_loss": 0.05872972309589386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944618356182765, + "compression_loss": 0.0, + "distillation_loss": 0.0241532102227211, + "epoch": 4.95, + "learning_rate": 9.875178798617286e-06, + "loss": 0.0284, + "step": 5214, + "task_loss": 0.06614520400762558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794467008249681, + "compression_loss": 0.0, + "distillation_loss": 0.021967854350805283, + "epoch": 4.95, + "learning_rate": 9.866696136666798e-06, + "loss": 0.0208, + "step": 5215, + "task_loss": 0.010009793564677238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944721776592659, + "compression_loss": 0.0, + "distillation_loss": 0.02724001184105873, + "epoch": 4.95, + "learning_rate": 9.858216223753833e-06, + "loss": 0.0286, + "step": 5216, + "task_loss": 0.04124392196536064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794477343848035, + "compression_loss": 0.0, + "distillation_loss": 0.027787886559963226, + "epoch": 4.95, + "learning_rate": 9.849739061418823e-06, + "loss": 0.0329, + "step": 5217, + "task_loss": 0.07907851040363312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794482506816992, + "compression_loss": 0.0, + "distillation_loss": 0.08973988890647888, + "epoch": 4.96, + "learning_rate": 9.841264651201676e-06, + "loss": 0.1021, + "step": 5218, + "task_loss": 0.21334464848041534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944876665671404, + "compression_loss": 0.0, + "distillation_loss": 0.06084804609417915, + "epoch": 4.96, + "learning_rate": 9.832792994641801e-06, + "loss": 0.0676, + "step": 5219, + "task_loss": 0.12829428911209106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944928230994841, + "compression_loss": 0.0, + "distillation_loss": 0.01122710295021534, + "epoch": 4.96, + "learning_rate": 9.824324093278134e-06, + "loss": 0.0247, + "step": 5220, + "task_loss": 0.1459704488515854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7944979764150266, + "compression_loss": 0.0, + "distillation_loss": 0.06464344263076782, + "epoch": 4.96, + "learning_rate": 9.815857948649082e-06, + "loss": 0.0686, + "step": 5221, + "task_loss": 0.10410156100988388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945031265147717, + "compression_loss": 0.0, + "distillation_loss": 0.01737487129867077, + "epoch": 4.96, + "learning_rate": 9.807394562292551e-06, + "loss": 0.0214, + "step": 5222, + "task_loss": 0.05750443786382675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945082733997232, + "compression_loss": 0.0, + "distillation_loss": 0.04283272475004196, + "epoch": 4.96, + "learning_rate": 9.798933935745968e-06, + "loss": 0.0468, + "step": 5223, + "task_loss": 0.08252625167369843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945134170708845, + "compression_loss": 0.0, + "distillation_loss": 0.029033223167061806, + "epoch": 4.96, + "learning_rate": 9.790476070546257e-06, + "loss": 0.0299, + "step": 5224, + "task_loss": 0.03801552951335907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945185575292595, + "compression_loss": 0.0, + "distillation_loss": 0.03287568688392639, + "epoch": 4.96, + "learning_rate": 9.782020968229813e-06, + "loss": 0.038, + "step": 5225, + "task_loss": 0.08447042852640152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945236947758517, + "compression_loss": 0.0, + "distillation_loss": 0.05485154315829277, + "epoch": 4.96, + "learning_rate": 9.77356863033254e-06, + "loss": 0.0504, + "step": 5226, + "task_loss": 0.010427333414554596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794528828811665, + "compression_loss": 0.0, + "distillation_loss": 0.030403347685933113, + "epoch": 4.96, + "learning_rate": 9.765119058389872e-06, + "loss": 0.0276, + "step": 5227, + "task_loss": 0.0026769302785396576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945339596377028, + "compression_loss": 0.0, + "distillation_loss": 0.023587405681610107, + "epoch": 4.96, + "learning_rate": 9.756672253936694e-06, + "loss": 0.0219, + "step": 5228, + "task_loss": 0.007051067426800728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945390872549691, + "compression_loss": 0.0, + "distillation_loss": 0.13783934712409973, + "epoch": 4.97, + "learning_rate": 9.748228218507408e-06, + "loss": 0.1264, + "step": 5229, + "task_loss": 0.02342773601412773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945442116644673, + "compression_loss": 0.0, + "distillation_loss": 0.02738800458610058, + "epoch": 4.97, + "learning_rate": 9.739786953635924e-06, + "loss": 0.0255, + "step": 5230, + "task_loss": 0.008501600474119186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945493328672013, + "compression_loss": 0.0, + "distillation_loss": 0.1387009620666504, + "epoch": 4.97, + "learning_rate": 9.731348460855638e-06, + "loss": 0.1468, + "step": 5231, + "task_loss": 0.21995283663272858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945544508641748, + "compression_loss": 0.0, + "distillation_loss": 0.02429133839905262, + "epoch": 4.97, + "learning_rate": 9.722912741699427e-06, + "loss": 0.0272, + "step": 5232, + "task_loss": 0.05331313982605934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945595656563912, + "compression_loss": 0.0, + "distillation_loss": 0.046510256826877594, + "epoch": 4.97, + "learning_rate": 9.714479797699694e-06, + "loss": 0.0428, + "step": 5233, + "task_loss": 0.009763523936271667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945646772448545, + "compression_loss": 0.0, + "distillation_loss": 0.025538455694913864, + "epoch": 4.97, + "learning_rate": 9.706049630388329e-06, + "loss": 0.0352, + "step": 5234, + "task_loss": 0.12184809148311615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945697856305681, + "compression_loss": 0.0, + "distillation_loss": 0.11467162519693375, + "epoch": 4.97, + "learning_rate": 9.697622241296705e-06, + "loss": 0.1134, + "step": 5235, + "task_loss": 0.10236775875091553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945748908145359, + "compression_loss": 0.0, + "distillation_loss": 0.0173861812800169, + "epoch": 4.97, + "learning_rate": 9.68919763195569e-06, + "loss": 0.0161, + "step": 5236, + "task_loss": 0.004365898668766022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945799927977615, + "compression_loss": 0.0, + "distillation_loss": 0.014998473227024078, + "epoch": 4.97, + "learning_rate": 9.680775803895672e-06, + "loss": 0.0141, + "step": 5237, + "task_loss": 0.005868380889296532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945850915812486, + "compression_loss": 0.0, + "distillation_loss": 0.04273030161857605, + "epoch": 4.97, + "learning_rate": 9.672356758646506e-06, + "loss": 0.0546, + "step": 5238, + "task_loss": 0.1609753668308258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945901871660008, + "compression_loss": 0.0, + "distillation_loss": 0.03701045364141464, + "epoch": 4.98, + "learning_rate": 9.663940497737556e-06, + "loss": 0.0345, + "step": 5239, + "task_loss": 0.01145661249756813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7945952795530219, + "compression_loss": 0.0, + "distillation_loss": 0.01707928627729416, + "epoch": 4.98, + "learning_rate": 9.655527022697652e-06, + "loss": 0.0161, + "step": 5240, + "task_loss": 0.00770198181271553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946003687433155, + "compression_loss": 0.0, + "distillation_loss": 0.034336067736148834, + "epoch": 4.98, + "learning_rate": 9.647116335055187e-06, + "loss": 0.0393, + "step": 5241, + "task_loss": 0.08418752998113632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946054547378854, + "compression_loss": 0.0, + "distillation_loss": 0.026298392564058304, + "epoch": 4.98, + "learning_rate": 9.638708436337976e-06, + "loss": 0.0248, + "step": 5242, + "task_loss": 0.01096322201192379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946105375377351, + "compression_loss": 0.0, + "distillation_loss": 0.03858939930796623, + "epoch": 4.98, + "learning_rate": 9.630303328073352e-06, + "loss": 0.0371, + "step": 5243, + "task_loss": 0.02343849278986454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946156171438685, + "compression_loss": 0.0, + "distillation_loss": 0.029876116663217545, + "epoch": 4.98, + "learning_rate": 9.621901011788157e-06, + "loss": 0.0363, + "step": 5244, + "task_loss": 0.09448274224996567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794620693557289, + "compression_loss": 0.0, + "distillation_loss": 0.07018247991800308, + "epoch": 4.98, + "learning_rate": 9.613501489008705e-06, + "loss": 0.08, + "step": 5245, + "task_loss": 0.1685934066772461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946257667790007, + "compression_loss": 0.0, + "distillation_loss": 0.08023358136415482, + "epoch": 4.98, + "learning_rate": 9.605104761260809e-06, + "loss": 0.0914, + "step": 5246, + "task_loss": 0.19211246073246002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946308368100068, + "compression_loss": 0.0, + "distillation_loss": 0.020654667168855667, + "epoch": 4.98, + "learning_rate": 9.596710830069769e-06, + "loss": 0.0201, + "step": 5247, + "task_loss": 0.014910630881786346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946359036513112, + "compression_loss": 0.0, + "distillation_loss": 0.07811079919338226, + "epoch": 4.98, + "learning_rate": 9.588319696960396e-06, + "loss": 0.0898, + "step": 5248, + "task_loss": 0.19523471593856812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946409673039178, + "compression_loss": 0.0, + "distillation_loss": 0.14166250824928284, + "epoch": 4.98, + "learning_rate": 9.579931363456968e-06, + "loss": 0.1623, + "step": 5249, + "task_loss": 0.3483670949935913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946460277688299, + "compression_loss": 0.0, + "distillation_loss": 0.11959753930568695, + "epoch": 4.99, + "learning_rate": 9.571545831083267e-06, + "loss": 0.1251, + "step": 5250, + "task_loss": 0.17435845732688904 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.9013761467889908, + "eval_loss": 0.40377190709114075, + "eval_runtime": 18.2101, + "eval_samples_per_second": 47.886, + "eval_steps_per_second": 5.986, + "step": 5250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946510850470513, + "compression_loss": 0.0, + "distillation_loss": 0.09491365402936935, + "epoch": 4.99, + "learning_rate": 9.563163101362582e-06, + "loss": 0.0985, + "step": 5251, + "task_loss": 0.13050074875354767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946561391395859, + "compression_loss": 0.0, + "distillation_loss": 0.043609265238046646, + "epoch": 4.99, + "learning_rate": 9.554783175817667e-06, + "loss": 0.0419, + "step": 5252, + "task_loss": 0.0262824147939682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946611900474371, + "compression_loss": 0.0, + "distillation_loss": 0.03674563765525818, + "epoch": 4.99, + "learning_rate": 9.546406055970769e-06, + "loss": 0.0376, + "step": 5253, + "task_loss": 0.045376792550086975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946662377716087, + "compression_loss": 0.0, + "distillation_loss": 0.015710389241576195, + "epoch": 4.99, + "learning_rate": 9.538031743343628e-06, + "loss": 0.0144, + "step": 5254, + "task_loss": 0.0030107852071523666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946712823131045, + "compression_loss": 0.0, + "distillation_loss": 0.018925126641988754, + "epoch": 4.99, + "learning_rate": 9.529660239457497e-06, + "loss": 0.0175, + "step": 5255, + "task_loss": 0.005088077858090401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.794676323672928, + "compression_loss": 0.0, + "distillation_loss": 0.08420512825250626, + "epoch": 4.99, + "learning_rate": 9.521291545833086e-06, + "loss": 0.0801, + "step": 5256, + "task_loss": 0.0430089607834816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946813618520829, + "compression_loss": 0.0, + "distillation_loss": 0.019614659249782562, + "epoch": 4.99, + "learning_rate": 9.512925663990608e-06, + "loss": 0.0183, + "step": 5257, + "task_loss": 0.006246289238333702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946863968515729, + "compression_loss": 0.0, + "distillation_loss": 0.029730752110481262, + "epoch": 4.99, + "learning_rate": 9.504562595449764e-06, + "loss": 0.0362, + "step": 5258, + "task_loss": 0.0940956100821495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946914286724018, + "compression_loss": 0.0, + "distillation_loss": 0.1385657787322998, + "epoch": 4.99, + "learning_rate": 9.496202341729765e-06, + "loss": 0.1312, + "step": 5259, + "task_loss": 0.06457454711198807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7946964573155733, + "compression_loss": 0.0, + "distillation_loss": 0.02025190182030201, + "epoch": 5.0, + "learning_rate": 9.487844904349274e-06, + "loss": 0.0327, + "step": 5260, + "task_loss": 0.1445349156856537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947014827820907, + "compression_loss": 0.0, + "distillation_loss": 0.119037926197052, + "epoch": 5.0, + "learning_rate": 9.479490284826459e-06, + "loss": 0.1162, + "step": 5261, + "task_loss": 0.09018512070178986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947065050729581, + "compression_loss": 0.0, + "distillation_loss": 0.054691120982170105, + "epoch": 5.0, + "learning_rate": 9.471138484678987e-06, + "loss": 0.0638, + "step": 5262, + "task_loss": 0.14550621807575226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947115241891791, + "compression_loss": 0.0, + "distillation_loss": 0.1421840786933899, + "epoch": 5.0, + "learning_rate": 9.462789505423997e-06, + "loss": 0.1377, + "step": 5263, + "task_loss": 0.09778082370758057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947165401317573, + "compression_loss": 0.0, + "distillation_loss": 0.039873864501714706, + "epoch": 5.0, + "learning_rate": 9.454443348578115e-06, + "loss": 0.0551, + "step": 5264, + "task_loss": 0.19219006597995758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947215529016963, + "compression_loss": 0.0, + "distillation_loss": 0.021231336519122124, + "epoch": 5.0, + "learning_rate": 9.446100015657474e-06, + "loss": 0.0198, + "step": 5265, + "task_loss": 0.007257444318383932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947265625000001, + "compression_loss": 0.0, + "distillation_loss": 0.08732712268829346, + "epoch": 5.0, + "learning_rate": 9.437759508177665e-06, + "loss": 0.0898, + "step": 5266, + "task_loss": 0.11206232756376266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947315689276719, + "compression_loss": 0.0, + "distillation_loss": 0.2404271364212036, + "epoch": 5.0, + "learning_rate": 9.429421827653798e-06, + "loss": 0.2203, + "step": 5267, + "task_loss": 0.03922963887453079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947365721857158, + "compression_loss": 0.0, + "distillation_loss": 0.48191341757774353, + "epoch": 5.0, + "learning_rate": 9.421086975600438e-06, + "loss": 0.484, + "step": 5268, + "task_loss": 0.5028781294822693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947415722751353, + "compression_loss": 0.0, + "distillation_loss": 0.2334894835948944, + "epoch": 5.0, + "learning_rate": 9.412754953531663e-06, + "loss": 0.2161, + "step": 5269, + "task_loss": 0.059303127229213715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947465691969342, + "compression_loss": 0.0, + "distillation_loss": 0.17375622689723969, + "epoch": 5.0, + "learning_rate": 9.404425762961022e-06, + "loss": 0.1641, + "step": 5270, + "task_loss": 0.07717826217412949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.794751562952116, + "compression_loss": 0.0, + "distillation_loss": 0.40218544006347656, + "epoch": 5.01, + "learning_rate": 9.396099405401543e-06, + "loss": 0.3844, + "step": 5271, + "task_loss": 0.22383946180343628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947565535416845, + "compression_loss": 0.0, + "distillation_loss": 0.28495174646377563, + "epoch": 5.01, + "learning_rate": 9.387775882365763e-06, + "loss": 0.2911, + "step": 5272, + "task_loss": 0.34688353538513184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947615409666433, + "compression_loss": 0.0, + "distillation_loss": 0.2111060917377472, + "epoch": 5.01, + "learning_rate": 9.379455195365684e-06, + "loss": 0.2001, + "step": 5273, + "task_loss": 0.10126888751983643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947665252279963, + "compression_loss": 0.0, + "distillation_loss": 0.15187914669513702, + "epoch": 5.01, + "learning_rate": 9.371137345912786e-06, + "loss": 0.1592, + "step": 5274, + "task_loss": 0.225071519613266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947715063267469, + "compression_loss": 0.0, + "distillation_loss": 0.22621603310108185, + "epoch": 5.01, + "learning_rate": 9.362822335518063e-06, + "loss": 0.2191, + "step": 5275, + "task_loss": 0.15458367764949799 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947764842638989, + "compression_loss": 0.0, + "distillation_loss": 0.11143049597740173, + "epoch": 5.01, + "learning_rate": 9.35451016569198e-06, + "loss": 0.1072, + "step": 5276, + "task_loss": 0.06862667948007584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.794781459040456, + "compression_loss": 0.0, + "distillation_loss": 0.070254385471344, + "epoch": 5.01, + "learning_rate": 9.346200837944474e-06, + "loss": 0.0712, + "step": 5277, + "task_loss": 0.07974936068058014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947864306574219, + "compression_loss": 0.0, + "distillation_loss": 0.04427797347307205, + "epoch": 5.01, + "learning_rate": 9.337894353784968e-06, + "loss": 0.04, + "step": 5278, + "task_loss": 0.0014720112085342407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947913991158003, + "compression_loss": 0.0, + "distillation_loss": 0.23665665090084076, + "epoch": 5.01, + "learning_rate": 9.329590714722388e-06, + "loss": 0.2457, + "step": 5279, + "task_loss": 0.32665759325027466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7947963644165947, + "compression_loss": 0.0, + "distillation_loss": 0.18532395362854004, + "epoch": 5.01, + "learning_rate": 9.321289922265124e-06, + "loss": 0.1669, + "step": 5280, + "task_loss": 0.0015691444277763367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.794801326560809, + "compression_loss": 0.0, + "distillation_loss": 0.20857329666614532, + "epoch": 5.02, + "learning_rate": 9.312991977921051e-06, + "loss": 0.198, + "step": 5281, + "task_loss": 0.10306287556886673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948062855494469, + "compression_loss": 0.0, + "distillation_loss": 0.11233891546726227, + "epoch": 5.02, + "learning_rate": 9.304696883197542e-06, + "loss": 0.1079, + "step": 5282, + "task_loss": 0.06844587624073029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948112413835119, + "compression_loss": 0.0, + "distillation_loss": 0.15439686179161072, + "epoch": 5.02, + "learning_rate": 9.29640463960143e-06, + "loss": 0.1511, + "step": 5283, + "task_loss": 0.12120820581912994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948161940640077, + "compression_loss": 0.0, + "distillation_loss": 0.07337729632854462, + "epoch": 5.02, + "learning_rate": 9.28811524863904e-06, + "loss": 0.0865, + "step": 5284, + "task_loss": 0.2042730152606964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948211435919381, + "compression_loss": 0.0, + "distillation_loss": 0.041931264102458954, + "epoch": 5.02, + "learning_rate": 9.279828711816182e-06, + "loss": 0.0454, + "step": 5285, + "task_loss": 0.0765017569065094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948260899683067, + "compression_loss": 0.0, + "distillation_loss": 0.21245259046554565, + "epoch": 5.02, + "learning_rate": 9.271545030638156e-06, + "loss": 0.2016, + "step": 5286, + "task_loss": 0.10362794995307922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948310331941172, + "compression_loss": 0.0, + "distillation_loss": 0.06203429400920868, + "epoch": 5.02, + "learning_rate": 9.263264206609726e-06, + "loss": 0.0577, + "step": 5287, + "task_loss": 0.018689151853322983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948359732703734, + "compression_loss": 0.0, + "distillation_loss": 0.07664155215024948, + "epoch": 5.02, + "learning_rate": 9.254986241235131e-06, + "loss": 0.0782, + "step": 5288, + "task_loss": 0.09256763756275177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948409101980788, + "compression_loss": 0.0, + "distillation_loss": 0.1382894665002823, + "epoch": 5.02, + "learning_rate": 9.246711136018124e-06, + "loss": 0.1313, + "step": 5289, + "task_loss": 0.06794591248035431 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948458439782371, + "compression_loss": 0.0, + "distillation_loss": 0.18572860956192017, + "epoch": 5.02, + "learning_rate": 9.23843889246191e-06, + "loss": 0.1894, + "step": 5290, + "task_loss": 0.22227227687835693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.794850774611852, + "compression_loss": 0.0, + "distillation_loss": 0.11491254717111588, + "epoch": 5.02, + "learning_rate": 9.230169512069168e-06, + "loss": 0.1096, + "step": 5291, + "task_loss": 0.061589665710926056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948557020999274, + "compression_loss": 0.0, + "distillation_loss": 0.03495645150542259, + "epoch": 5.03, + "learning_rate": 9.221902996342084e-06, + "loss": 0.0318, + "step": 5292, + "task_loss": 0.0034442245960235596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948606264434667, + "compression_loss": 0.0, + "distillation_loss": 0.32723939418792725, + "epoch": 5.03, + "learning_rate": 9.213639346782315e-06, + "loss": 0.3206, + "step": 5293, + "task_loss": 0.261165052652359 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948655476434737, + "compression_loss": 0.0, + "distillation_loss": 0.03939376026391983, + "epoch": 5.03, + "learning_rate": 9.20537856489099e-06, + "loss": 0.0538, + "step": 5294, + "task_loss": 0.18324826657772064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948704657009521, + "compression_loss": 0.0, + "distillation_loss": 0.14817285537719727, + "epoch": 5.03, + "learning_rate": 9.197120652168706e-06, + "loss": 0.1403, + "step": 5295, + "task_loss": 0.06911684572696686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948753806169055, + "compression_loss": 0.0, + "distillation_loss": 0.0447382926940918, + "epoch": 5.03, + "learning_rate": 9.18886561011557e-06, + "loss": 0.0421, + "step": 5296, + "task_loss": 0.01793717034161091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948802923923376, + "compression_loss": 0.0, + "distillation_loss": 0.05252248793840408, + "epoch": 5.03, + "learning_rate": 9.180613440231145e-06, + "loss": 0.0492, + "step": 5297, + "task_loss": 0.019047562032938004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948852010282522, + "compression_loss": 0.0, + "distillation_loss": 0.10433128476142883, + "epoch": 5.03, + "learning_rate": 9.172364144014466e-06, + "loss": 0.1018, + "step": 5298, + "task_loss": 0.07895547151565552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948901065256528, + "compression_loss": 0.0, + "distillation_loss": 0.04161163419485092, + "epoch": 5.03, + "learning_rate": 9.164117722964077e-06, + "loss": 0.0458, + "step": 5299, + "task_loss": 0.0835949033498764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948950088855433, + "compression_loss": 0.0, + "distillation_loss": 0.1584656685590744, + "epoch": 5.03, + "learning_rate": 9.155874178577973e-06, + "loss": 0.1606, + "step": 5300, + "task_loss": 0.18005318939685822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7948999081089272, + "compression_loss": 0.0, + "distillation_loss": 0.16821911931037903, + "epoch": 5.03, + "learning_rate": 9.147633512353618e-06, + "loss": 0.1554, + "step": 5301, + "task_loss": 0.04032587260007858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949048041968082, + "compression_loss": 0.0, + "distillation_loss": 0.046078503131866455, + "epoch": 5.04, + "learning_rate": 9.139395725787981e-06, + "loss": 0.0506, + "step": 5302, + "task_loss": 0.09137886017560959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79490969715019, + "compression_loss": 0.0, + "distillation_loss": 0.1656574308872223, + "epoch": 5.04, + "learning_rate": 9.131160820377505e-06, + "loss": 0.1672, + "step": 5303, + "task_loss": 0.18076612055301666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949145869700764, + "compression_loss": 0.0, + "distillation_loss": 0.159349262714386, + "epoch": 5.04, + "learning_rate": 9.12292879761809e-06, + "loss": 0.1652, + "step": 5304, + "task_loss": 0.21749325096607208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.794919473657471, + "compression_loss": 0.0, + "distillation_loss": 0.09572987258434296, + "epoch": 5.04, + "learning_rate": 9.11469965900512e-06, + "loss": 0.1055, + "step": 5305, + "task_loss": 0.19356384873390198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949243572133774, + "compression_loss": 0.0, + "distillation_loss": 0.06085019186139107, + "epoch": 5.04, + "learning_rate": 9.106473406033464e-06, + "loss": 0.0755, + "step": 5306, + "task_loss": 0.2072937786579132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949292376387993, + "compression_loss": 0.0, + "distillation_loss": 0.09842538833618164, + "epoch": 5.04, + "learning_rate": 9.098250040197462e-06, + "loss": 0.113, + "step": 5307, + "task_loss": 0.24399572610855103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949341149347405, + "compression_loss": 0.0, + "distillation_loss": 0.1453973799943924, + "epoch": 5.04, + "learning_rate": 9.090029562990911e-06, + "loss": 0.1474, + "step": 5308, + "task_loss": 0.16563892364501953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949389891022046, + "compression_loss": 0.0, + "distillation_loss": 0.04174884408712387, + "epoch": 5.04, + "learning_rate": 9.081811975907126e-06, + "loss": 0.0491, + "step": 5309, + "task_loss": 0.11511567234992981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949438601421954, + "compression_loss": 0.0, + "distillation_loss": 0.02893233485519886, + "epoch": 5.04, + "learning_rate": 9.07359728043885e-06, + "loss": 0.0264, + "step": 5310, + "task_loss": 0.003949105739593506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949487280557164, + "compression_loss": 0.0, + "distillation_loss": 0.048204317688941956, + "epoch": 5.04, + "learning_rate": 9.065385478078337e-06, + "loss": 0.0441, + "step": 5311, + "task_loss": 0.007329225540161133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949535928437713, + "compression_loss": 0.0, + "distillation_loss": 0.043466269969940186, + "epoch": 5.04, + "learning_rate": 9.057176570317285e-06, + "loss": 0.0406, + "step": 5312, + "task_loss": 0.015056092292070389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949584545073639, + "compression_loss": 0.0, + "distillation_loss": 0.21660102903842926, + "epoch": 5.05, + "learning_rate": 9.0489705586469e-06, + "loss": 0.2129, + "step": 5313, + "task_loss": 0.17953038215637207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949633130474979, + "compression_loss": 0.0, + "distillation_loss": 0.11881037801504135, + "epoch": 5.05, + "learning_rate": 9.040767444557836e-06, + "loss": 0.1213, + "step": 5314, + "task_loss": 0.14401260018348694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949681684651768, + "compression_loss": 0.0, + "distillation_loss": 0.11176039278507233, + "epoch": 5.05, + "learning_rate": 9.032567229540212e-06, + "loss": 0.1134, + "step": 5315, + "task_loss": 0.12841668725013733 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949730207614044, + "compression_loss": 0.0, + "distillation_loss": 0.14567351341247559, + "epoch": 5.05, + "learning_rate": 9.024369915083664e-06, + "loss": 0.1399, + "step": 5316, + "task_loss": 0.0879807248711586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949778699371844, + "compression_loss": 0.0, + "distillation_loss": 0.05770561844110489, + "epoch": 5.05, + "learning_rate": 9.01617550267726e-06, + "loss": 0.0608, + "step": 5317, + "task_loss": 0.08822789788246155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949827159935204, + "compression_loss": 0.0, + "distillation_loss": 0.10660059005022049, + "epoch": 5.05, + "learning_rate": 9.007983993809543e-06, + "loss": 0.0995, + "step": 5318, + "task_loss": 0.03520110249519348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949875589314163, + "compression_loss": 0.0, + "distillation_loss": 0.06890207529067993, + "epoch": 5.05, + "learning_rate": 8.999795389968554e-06, + "loss": 0.0647, + "step": 5319, + "task_loss": 0.027203164994716644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949923987518754, + "compression_loss": 0.0, + "distillation_loss": 0.045612677931785583, + "epoch": 5.05, + "learning_rate": 8.9916096926418e-06, + "loss": 0.048, + "step": 5320, + "task_loss": 0.06988806277513504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7949972354559018, + "compression_loss": 0.0, + "distillation_loss": 0.054146356880664825, + "epoch": 5.05, + "learning_rate": 8.983426903316242e-06, + "loss": 0.0589, + "step": 5321, + "task_loss": 0.10151663422584534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950020690444989, + "compression_loss": 0.0, + "distillation_loss": 0.09348467737436295, + "epoch": 5.05, + "learning_rate": 8.975247023478314e-06, + "loss": 0.0887, + "step": 5322, + "task_loss": 0.045579515397548676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950068995186704, + "compression_loss": 0.0, + "distillation_loss": 0.04567846655845642, + "epoch": 5.06, + "learning_rate": 8.967070054613949e-06, + "loss": 0.048, + "step": 5323, + "task_loss": 0.06916110217571259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950117268794201, + "compression_loss": 0.0, + "distillation_loss": 0.03903430327773094, + "epoch": 5.06, + "learning_rate": 8.958895998208524e-06, + "loss": 0.0357, + "step": 5324, + "task_loss": 0.005971364676952362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950165511277517, + "compression_loss": 0.0, + "distillation_loss": 0.16417047381401062, + "epoch": 5.06, + "learning_rate": 8.95072485574689e-06, + "loss": 0.165, + "step": 5325, + "task_loss": 0.17254981398582458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950213722646687, + "compression_loss": 0.0, + "distillation_loss": 0.05708067864179611, + "epoch": 5.06, + "learning_rate": 8.942556628713386e-06, + "loss": 0.0848, + "step": 5326, + "task_loss": 0.33394309878349304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795026190291175, + "compression_loss": 0.0, + "distillation_loss": 0.10749347507953644, + "epoch": 5.06, + "learning_rate": 8.934391318591794e-06, + "loss": 0.1102, + "step": 5327, + "task_loss": 0.13443301618099213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950310052082741, + "compression_loss": 0.0, + "distillation_loss": 0.07780588418245316, + "epoch": 5.06, + "learning_rate": 8.926228926865404e-06, + "loss": 0.0898, + "step": 5328, + "task_loss": 0.19749955832958221 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950358170169698, + "compression_loss": 0.0, + "distillation_loss": 0.1407744139432907, + "epoch": 5.06, + "learning_rate": 8.918069455016931e-06, + "loss": 0.1353, + "step": 5329, + "task_loss": 0.08600475639104843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950406257182657, + "compression_loss": 0.0, + "distillation_loss": 0.09022311121225357, + "epoch": 5.06, + "learning_rate": 8.9099129045286e-06, + "loss": 0.0966, + "step": 5330, + "task_loss": 0.15400820970535278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950454313131655, + "compression_loss": 0.0, + "distillation_loss": 0.18497398495674133, + "epoch": 5.06, + "learning_rate": 8.901759276882082e-06, + "loss": 0.2032, + "step": 5331, + "task_loss": 0.36690419912338257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795050233802673, + "compression_loss": 0.0, + "distillation_loss": 0.04657813161611557, + "epoch": 5.06, + "learning_rate": 8.893608573558515e-06, + "loss": 0.0449, + "step": 5332, + "task_loss": 0.029313866049051285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950550331877917, + "compression_loss": 0.0, + "distillation_loss": 0.22918027639389038, + "epoch": 5.06, + "learning_rate": 8.885460796038525e-06, + "loss": 0.2344, + "step": 5333, + "task_loss": 0.2814333438873291 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950598294695255, + "compression_loss": 0.0, + "distillation_loss": 0.02877352014183998, + "epoch": 5.07, + "learning_rate": 8.877315945802191e-06, + "loss": 0.0269, + "step": 5334, + "task_loss": 0.010271269828081131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950646226488778, + "compression_loss": 0.0, + "distillation_loss": 0.039790619164705276, + "epoch": 5.07, + "learning_rate": 8.869174024329063e-06, + "loss": 0.0367, + "step": 5335, + "task_loss": 0.00851232185959816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950694127268525, + "compression_loss": 0.0, + "distillation_loss": 0.05201128497719765, + "epoch": 5.07, + "learning_rate": 8.861035033098144e-06, + "loss": 0.0603, + "step": 5336, + "task_loss": 0.13527747988700867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950741997044531, + "compression_loss": 0.0, + "distillation_loss": 0.02241450734436512, + "epoch": 5.07, + "learning_rate": 8.852898973587953e-06, + "loss": 0.0208, + "step": 5337, + "task_loss": 0.005801070481538773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950789835826836, + "compression_loss": 0.0, + "distillation_loss": 0.12234519422054291, + "epoch": 5.07, + "learning_rate": 8.844765847276432e-06, + "loss": 0.1178, + "step": 5338, + "task_loss": 0.07687139511108398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950837643625474, + "compression_loss": 0.0, + "distillation_loss": 0.042783450335264206, + "epoch": 5.07, + "learning_rate": 8.836635655640988e-06, + "loss": 0.0391, + "step": 5339, + "task_loss": 0.0057541318237781525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950885420450482, + "compression_loss": 0.0, + "distillation_loss": 0.20203274488449097, + "epoch": 5.07, + "learning_rate": 8.828508400158531e-06, + "loss": 0.1948, + "step": 5340, + "task_loss": 0.13001954555511475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950933166311898, + "compression_loss": 0.0, + "distillation_loss": 0.03790685907006264, + "epoch": 5.07, + "learning_rate": 8.820384082305403e-06, + "loss": 0.0417, + "step": 5341, + "task_loss": 0.07593842595815659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7950980881219757, + "compression_loss": 0.0, + "distillation_loss": 0.02947581559419632, + "epoch": 5.07, + "learning_rate": 8.812262703557431e-06, + "loss": 0.027, + "step": 5342, + "task_loss": 0.004847317934036255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951028565184098, + "compression_loss": 0.0, + "distillation_loss": 0.03460695222020149, + "epoch": 5.07, + "learning_rate": 8.804144265389888e-06, + "loss": 0.0316, + "step": 5343, + "task_loss": 0.004228517413139343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951076218214957, + "compression_loss": 0.0, + "distillation_loss": 0.1003609448671341, + "epoch": 5.08, + "learning_rate": 8.796028769277542e-06, + "loss": 0.0966, + "step": 5344, + "task_loss": 0.06230350583791733 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951123840322372, + "compression_loss": 0.0, + "distillation_loss": 0.06244899705052376, + "epoch": 5.08, + "learning_rate": 8.78791621669462e-06, + "loss": 0.0731, + "step": 5345, + "task_loss": 0.16876111924648285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951171431516376, + "compression_loss": 0.0, + "distillation_loss": 0.030795224010944366, + "epoch": 5.08, + "learning_rate": 8.77980660911479e-06, + "loss": 0.0394, + "step": 5346, + "task_loss": 0.11702409386634827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795121899180701, + "compression_loss": 0.0, + "distillation_loss": 0.12711569666862488, + "epoch": 5.08, + "learning_rate": 8.771699948011202e-06, + "loss": 0.1325, + "step": 5347, + "task_loss": 0.18047887086868286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951266521204309, + "compression_loss": 0.0, + "distillation_loss": 0.05044536665081978, + "epoch": 5.08, + "learning_rate": 8.763596234856482e-06, + "loss": 0.0528, + "step": 5348, + "task_loss": 0.07405995577573776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795131401971831, + "compression_loss": 0.0, + "distillation_loss": 0.08389607816934586, + "epoch": 5.08, + "learning_rate": 8.7554954711227e-06, + "loss": 0.0782, + "step": 5349, + "task_loss": 0.02667861431837082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951361487359049, + "compression_loss": 0.0, + "distillation_loss": 0.06966320425271988, + "epoch": 5.08, + "learning_rate": 8.747397658281395e-06, + "loss": 0.0719, + "step": 5350, + "task_loss": 0.09200514107942581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951408924136564, + "compression_loss": 0.0, + "distillation_loss": 0.04684102535247803, + "epoch": 5.08, + "learning_rate": 8.739302797803585e-06, + "loss": 0.0432, + "step": 5351, + "task_loss": 0.010602116584777832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951456330060892, + "compression_loss": 0.0, + "distillation_loss": 0.024965034797787666, + "epoch": 5.08, + "learning_rate": 8.731210891159736e-06, + "loss": 0.0232, + "step": 5352, + "task_loss": 0.0075826942920684814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951503705142069, + "compression_loss": 0.0, + "distillation_loss": 0.04389042407274246, + "epoch": 5.08, + "learning_rate": 8.72312193981977e-06, + "loss": 0.0407, + "step": 5353, + "task_loss": 0.011852305382490158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951551049390132, + "compression_loss": 0.0, + "distillation_loss": 0.1792694330215454, + "epoch": 5.08, + "learning_rate": 8.715035945253095e-06, + "loss": 0.1675, + "step": 5354, + "task_loss": 0.06159057095646858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951598362815118, + "compression_loss": 0.0, + "distillation_loss": 0.07054481655359268, + "epoch": 5.09, + "learning_rate": 8.706952908928576e-06, + "loss": 0.0744, + "step": 5355, + "task_loss": 0.1091315969824791 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951645645427063, + "compression_loss": 0.0, + "distillation_loss": 0.07862917333841324, + "epoch": 5.09, + "learning_rate": 8.69887283231453e-06, + "loss": 0.0713, + "step": 5356, + "task_loss": 0.005761042237281799 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951692897236006, + "compression_loss": 0.0, + "distillation_loss": 0.07857898622751236, + "epoch": 5.09, + "learning_rate": 8.690795716878733e-06, + "loss": 0.0842, + "step": 5357, + "task_loss": 0.1351117193698883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951740118251982, + "compression_loss": 0.0, + "distillation_loss": 0.025391805917024612, + "epoch": 5.09, + "learning_rate": 8.68272156408845e-06, + "loss": 0.0238, + "step": 5358, + "task_loss": 0.00902603566646576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951787308485028, + "compression_loss": 0.0, + "distillation_loss": 0.033960044384002686, + "epoch": 5.09, + "learning_rate": 8.67465037541038e-06, + "loss": 0.0314, + "step": 5359, + "task_loss": 0.008060634136199951 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795183446794518, + "compression_loss": 0.0, + "distillation_loss": 0.015219409950077534, + "epoch": 5.09, + "learning_rate": 8.666582152310681e-06, + "loss": 0.0192, + "step": 5360, + "task_loss": 0.05521482601761818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951881596642477, + "compression_loss": 0.0, + "distillation_loss": 0.04451043903827667, + "epoch": 5.09, + "learning_rate": 8.658516896255008e-06, + "loss": 0.0419, + "step": 5361, + "task_loss": 0.01849602535367012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7951928694586955, + "compression_loss": 0.0, + "distillation_loss": 0.10168831050395966, + "epoch": 5.09, + "learning_rate": 8.650454608708434e-06, + "loss": 0.1097, + "step": 5362, + "task_loss": 0.18197688460350037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795197576178865, + "compression_loss": 0.0, + "distillation_loss": 0.015219754539430141, + "epoch": 5.09, + "learning_rate": 8.642395291135527e-06, + "loss": 0.014, + "step": 5363, + "task_loss": 0.003273945301771164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952022798257599, + "compression_loss": 0.0, + "distillation_loss": 0.03735918551683426, + "epoch": 5.09, + "learning_rate": 8.634338945000287e-06, + "loss": 0.0442, + "step": 5364, + "task_loss": 0.10612202435731888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795206980400384, + "compression_loss": 0.0, + "distillation_loss": 0.0819498747587204, + "epoch": 5.09, + "learning_rate": 8.626285571766204e-06, + "loss": 0.1011, + "step": 5365, + "task_loss": 0.27321359515190125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952116779037407, + "compression_loss": 0.0, + "distillation_loss": 0.036407940089702606, + "epoch": 5.1, + "learning_rate": 8.618235172896203e-06, + "loss": 0.0459, + "step": 5366, + "task_loss": 0.13110488653182983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952163723368341, + "compression_loss": 0.0, + "distillation_loss": 0.03413936868309975, + "epoch": 5.1, + "learning_rate": 8.61018774985267e-06, + "loss": 0.0388, + "step": 5367, + "task_loss": 0.08028659969568253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952210637006675, + "compression_loss": 0.0, + "distillation_loss": 0.026723839342594147, + "epoch": 5.1, + "learning_rate": 8.602143304097477e-06, + "loss": 0.0248, + "step": 5368, + "task_loss": 0.007091682404279709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952257519962448, + "compression_loss": 0.0, + "distillation_loss": 0.07885417342185974, + "epoch": 5.1, + "learning_rate": 8.594101837091922e-06, + "loss": 0.0772, + "step": 5369, + "task_loss": 0.06278681010007858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952304372245697, + "compression_loss": 0.0, + "distillation_loss": 0.19364023208618164, + "epoch": 5.1, + "learning_rate": 8.586063350296775e-06, + "loss": 0.1925, + "step": 5370, + "task_loss": 0.18187300860881805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952351193866457, + "compression_loss": 0.0, + "distillation_loss": 0.02647271379828453, + "epoch": 5.1, + "learning_rate": 8.578027845172269e-06, + "loss": 0.0321, + "step": 5371, + "task_loss": 0.08231362700462341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952397984834766, + "compression_loss": 0.0, + "distillation_loss": 0.04869615286588669, + "epoch": 5.1, + "learning_rate": 8.569995323178102e-06, + "loss": 0.0519, + "step": 5372, + "task_loss": 0.08067583292722702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952444745160662, + "compression_loss": 0.0, + "distillation_loss": 0.017870426177978516, + "epoch": 5.1, + "learning_rate": 8.561965785773413e-06, + "loss": 0.0165, + "step": 5373, + "task_loss": 0.004511706531047821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952491474854179, + "compression_loss": 0.0, + "distillation_loss": 0.010581446811556816, + "epoch": 5.1, + "learning_rate": 8.553939234416796e-06, + "loss": 0.0097, + "step": 5374, + "task_loss": 0.0022147856652736664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952538173925355, + "compression_loss": 0.0, + "distillation_loss": 0.04116586595773697, + "epoch": 5.1, + "learning_rate": 8.545915670566331e-06, + "loss": 0.0379, + "step": 5375, + "task_loss": 0.008842799812555313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952584842384228, + "compression_loss": 0.0, + "distillation_loss": 0.2459995001554489, + "epoch": 5.11, + "learning_rate": 8.53789509567953e-06, + "loss": 0.2456, + "step": 5376, + "task_loss": 0.24173244833946228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952631480240835, + "compression_loss": 0.0, + "distillation_loss": 0.09949977695941925, + "epoch": 5.11, + "learning_rate": 8.529877511213357e-06, + "loss": 0.0938, + "step": 5377, + "task_loss": 0.04220582917332649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795267808750521, + "compression_loss": 0.0, + "distillation_loss": 0.03112136758863926, + "epoch": 5.11, + "learning_rate": 8.521862918624265e-06, + "loss": 0.0283, + "step": 5378, + "task_loss": 0.0033892393112182617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952724664187393, + "compression_loss": 0.0, + "distillation_loss": 0.08223070204257965, + "epoch": 5.11, + "learning_rate": 8.513851319368124e-06, + "loss": 0.0853, + "step": 5379, + "task_loss": 0.11259977519512177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952771210297419, + "compression_loss": 0.0, + "distillation_loss": 0.07326701283454895, + "epoch": 5.11, + "learning_rate": 8.505842714900297e-06, + "loss": 0.0902, + "step": 5380, + "task_loss": 0.24259977042675018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952817725845325, + "compression_loss": 0.0, + "distillation_loss": 0.03324619308114052, + "epoch": 5.11, + "learning_rate": 8.497837106675571e-06, + "loss": 0.0454, + "step": 5381, + "task_loss": 0.15457826852798462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952864210841147, + "compression_loss": 0.0, + "distillation_loss": 0.05445803701877594, + "epoch": 5.11, + "learning_rate": 8.489834496148217e-06, + "loss": 0.0585, + "step": 5382, + "task_loss": 0.09474760293960571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952910665294924, + "compression_loss": 0.0, + "distillation_loss": 0.05557304993271828, + "epoch": 5.11, + "learning_rate": 8.481834884771938e-06, + "loss": 0.0507, + "step": 5383, + "task_loss": 0.006748411804437637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7952957089216692, + "compression_loss": 0.0, + "distillation_loss": 0.09471587836742401, + "epoch": 5.11, + "learning_rate": 8.4738382739999e-06, + "loss": 0.094, + "step": 5384, + "task_loss": 0.08728061616420746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953003482616489, + "compression_loss": 0.0, + "distillation_loss": 0.10454539954662323, + "epoch": 5.11, + "learning_rate": 8.465844665284739e-06, + "loss": 0.1136, + "step": 5385, + "task_loss": 0.19549594819545746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953049845504347, + "compression_loss": 0.0, + "distillation_loss": 0.17727123200893402, + "epoch": 5.11, + "learning_rate": 8.45785406007852e-06, + "loss": 0.1673, + "step": 5386, + "task_loss": 0.07721205800771713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953096177890309, + "compression_loss": 0.0, + "distillation_loss": 0.02914445474743843, + "epoch": 5.12, + "learning_rate": 8.449866459832775e-06, + "loss": 0.0333, + "step": 5387, + "task_loss": 0.0706995502114296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953142479784407, + "compression_loss": 0.0, + "distillation_loss": 0.03571475297212601, + "epoch": 5.12, + "learning_rate": 8.44188186599849e-06, + "loss": 0.0336, + "step": 5388, + "task_loss": 0.014664966613054276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953188751196681, + "compression_loss": 0.0, + "distillation_loss": 0.06461668014526367, + "epoch": 5.12, + "learning_rate": 8.433900280026118e-06, + "loss": 0.0607, + "step": 5389, + "task_loss": 0.0250856913626194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953234992137167, + "compression_loss": 0.0, + "distillation_loss": 0.09197506308555603, + "epoch": 5.12, + "learning_rate": 8.425921703365547e-06, + "loss": 0.094, + "step": 5390, + "task_loss": 0.11221229285001755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953281202615902, + "compression_loss": 0.0, + "distillation_loss": 0.04114022105932236, + "epoch": 5.12, + "learning_rate": 8.417946137466107e-06, + "loss": 0.0377, + "step": 5391, + "task_loss": 0.006361830979585648 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953327382642921, + "compression_loss": 0.0, + "distillation_loss": 0.11470133811235428, + "epoch": 5.12, + "learning_rate": 8.409973583776624e-06, + "loss": 0.133, + "step": 5392, + "task_loss": 0.29758089780807495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953373532228262, + "compression_loss": 0.0, + "distillation_loss": 0.029153695330023766, + "epoch": 5.12, + "learning_rate": 8.402004043745335e-06, + "loss": 0.0271, + "step": 5393, + "task_loss": 0.008810054510831833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953419651381962, + "compression_loss": 0.0, + "distillation_loss": 0.031133677810430527, + "epoch": 5.12, + "learning_rate": 8.39403751881994e-06, + "loss": 0.0376, + "step": 5394, + "task_loss": 0.09556027501821518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953465740114058, + "compression_loss": 0.0, + "distillation_loss": 0.08608759194612503, + "epoch": 5.12, + "learning_rate": 8.386074010447614e-06, + "loss": 0.0831, + "step": 5395, + "task_loss": 0.05571907386183739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953511798434587, + "compression_loss": 0.0, + "distillation_loss": 0.028766460716724396, + "epoch": 5.12, + "learning_rate": 8.378113520074949e-06, + "loss": 0.0262, + "step": 5396, + "task_loss": 0.003098204731941223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953557826353584, + "compression_loss": 0.0, + "distillation_loss": 0.09285937994718552, + "epoch": 5.13, + "learning_rate": 8.370156049148022e-06, + "loss": 0.0864, + "step": 5397, + "task_loss": 0.027815401554107666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953603823881089, + "compression_loss": 0.0, + "distillation_loss": 0.07426916807889938, + "epoch": 5.13, + "learning_rate": 8.362201599112332e-06, + "loss": 0.0757, + "step": 5398, + "task_loss": 0.08855120837688446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953649791027136, + "compression_loss": 0.0, + "distillation_loss": 0.026602642610669136, + "epoch": 5.13, + "learning_rate": 8.354250171412859e-06, + "loss": 0.0299, + "step": 5399, + "task_loss": 0.0595041885972023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953695727801764, + "compression_loss": 0.0, + "distillation_loss": 0.10936440527439117, + "epoch": 5.13, + "learning_rate": 8.346301767494008e-06, + "loss": 0.1146, + "step": 5400, + "task_loss": 0.16161611676216125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953741634215007, + "compression_loss": 0.0, + "distillation_loss": 0.021258335560560226, + "epoch": 5.13, + "learning_rate": 8.338356388799637e-06, + "loss": 0.0326, + "step": 5401, + "task_loss": 0.1344192773103714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953787510276905, + "compression_loss": 0.0, + "distillation_loss": 0.04099886491894722, + "epoch": 5.13, + "learning_rate": 8.330414036773082e-06, + "loss": 0.0412, + "step": 5402, + "task_loss": 0.04298762232065201 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953833355997493, + "compression_loss": 0.0, + "distillation_loss": 0.07252480089664459, + "epoch": 5.13, + "learning_rate": 8.322474712857095e-06, + "loss": 0.0676, + "step": 5403, + "task_loss": 0.023213542997837067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953879171386807, + "compression_loss": 0.0, + "distillation_loss": 0.023728784173727036, + "epoch": 5.13, + "learning_rate": 8.314538418493892e-06, + "loss": 0.0218, + "step": 5404, + "task_loss": 0.004381675273180008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953924956454886, + "compression_loss": 0.0, + "distillation_loss": 0.05856727808713913, + "epoch": 5.13, + "learning_rate": 8.306605155125141e-06, + "loss": 0.0642, + "step": 5405, + "task_loss": 0.11514223366975784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7953970711211765, + "compression_loss": 0.0, + "distillation_loss": 0.022047195583581924, + "epoch": 5.13, + "learning_rate": 8.298674924191968e-06, + "loss": 0.0206, + "step": 5406, + "task_loss": 0.0073068346828222275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954016435667483, + "compression_loss": 0.0, + "distillation_loss": 0.02194095030426979, + "epoch": 5.13, + "learning_rate": 8.29074772713493e-06, + "loss": 0.0201, + "step": 5407, + "task_loss": 0.0031332336366176605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954062129832075, + "compression_loss": 0.0, + "distillation_loss": 0.03532099723815918, + "epoch": 5.14, + "learning_rate": 8.282823565394032e-06, + "loss": 0.0414, + "step": 5408, + "task_loss": 0.09565601497888565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954107793715578, + "compression_loss": 0.0, + "distillation_loss": 0.12141124904155731, + "epoch": 5.14, + "learning_rate": 8.274902440408755e-06, + "loss": 0.125, + "step": 5409, + "task_loss": 0.1574663519859314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954153427328029, + "compression_loss": 0.0, + "distillation_loss": 0.0844094529747963, + "epoch": 5.14, + "learning_rate": 8.266984353617995e-06, + "loss": 0.092, + "step": 5410, + "task_loss": 0.16067147254943848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954199030679464, + "compression_loss": 0.0, + "distillation_loss": 0.07113252580165863, + "epoch": 5.14, + "learning_rate": 8.259069306460107e-06, + "loss": 0.0768, + "step": 5411, + "task_loss": 0.12745647132396698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954244603779922, + "compression_loss": 0.0, + "distillation_loss": 0.12387774884700775, + "epoch": 5.14, + "learning_rate": 8.251157300372913e-06, + "loss": 0.1206, + "step": 5412, + "task_loss": 0.09093039482831955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954290146639438, + "compression_loss": 0.0, + "distillation_loss": 0.12096986174583435, + "epoch": 5.14, + "learning_rate": 8.243248336793658e-06, + "loss": 0.1201, + "step": 5413, + "task_loss": 0.11250494420528412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954335659268049, + "compression_loss": 0.0, + "distillation_loss": 0.09077440202236176, + "epoch": 5.14, + "learning_rate": 8.235342417159036e-06, + "loss": 0.0899, + "step": 5414, + "task_loss": 0.08181983232498169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954381141675794, + "compression_loss": 0.0, + "distillation_loss": 0.08009003102779388, + "epoch": 5.14, + "learning_rate": 8.227439542905205e-06, + "loss": 0.0828, + "step": 5415, + "task_loss": 0.10733769834041595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954426593872705, + "compression_loss": 0.0, + "distillation_loss": 0.05436462163925171, + "epoch": 5.14, + "learning_rate": 8.219539715467766e-06, + "loss": 0.0544, + "step": 5416, + "task_loss": 0.05464364215731621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954472015868824, + "compression_loss": 0.0, + "distillation_loss": 0.029182853177189827, + "epoch": 5.14, + "learning_rate": 8.211642936281752e-06, + "loss": 0.0272, + "step": 5417, + "task_loss": 0.00951925665140152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954517407674185, + "compression_loss": 0.0, + "distillation_loss": 0.027566958218812943, + "epoch": 5.15, + "learning_rate": 8.203749206781647e-06, + "loss": 0.0264, + "step": 5418, + "task_loss": 0.015928007662296295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954562769298826, + "compression_loss": 0.0, + "distillation_loss": 0.17319881916046143, + "epoch": 5.15, + "learning_rate": 8.195858528401396e-06, + "loss": 0.1678, + "step": 5419, + "task_loss": 0.11959183216094971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954608100752782, + "compression_loss": 0.0, + "distillation_loss": 0.012766760773956776, + "epoch": 5.15, + "learning_rate": 8.187970902574371e-06, + "loss": 0.0118, + "step": 5420, + "task_loss": 0.002884853631258011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954653402046092, + "compression_loss": 0.0, + "distillation_loss": 0.1071738451719284, + "epoch": 5.15, + "learning_rate": 8.180086330733403e-06, + "loss": 0.1129, + "step": 5421, + "task_loss": 0.16459250450134277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954698673188793, + "compression_loss": 0.0, + "distillation_loss": 0.06729540228843689, + "epoch": 5.15, + "learning_rate": 8.172204814310742e-06, + "loss": 0.0673, + "step": 5422, + "task_loss": 0.06722187995910645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954743914190919, + "compression_loss": 0.0, + "distillation_loss": 0.041276026517152786, + "epoch": 5.15, + "learning_rate": 8.164326354738134e-06, + "loss": 0.0452, + "step": 5423, + "task_loss": 0.0802101194858551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954789125062509, + "compression_loss": 0.0, + "distillation_loss": 0.04235687106847763, + "epoch": 5.15, + "learning_rate": 8.156450953446725e-06, + "loss": 0.0563, + "step": 5424, + "task_loss": 0.18132589757442474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954834305813601, + "compression_loss": 0.0, + "distillation_loss": 0.041152555495500565, + "epoch": 5.15, + "learning_rate": 8.148578611867114e-06, + "loss": 0.051, + "step": 5425, + "task_loss": 0.13986477255821228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954879456454228, + "compression_loss": 0.0, + "distillation_loss": 0.034636110067367554, + "epoch": 5.15, + "learning_rate": 8.14070933142936e-06, + "loss": 0.0315, + "step": 5426, + "task_loss": 0.003546604886651039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795492457699443, + "compression_loss": 0.0, + "distillation_loss": 0.040368109941482544, + "epoch": 5.15, + "learning_rate": 8.132843113562954e-06, + "loss": 0.0446, + "step": 5427, + "task_loss": 0.08240634202957153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7954969667444243, + "compression_loss": 0.0, + "distillation_loss": 0.028532054275274277, + "epoch": 5.15, + "learning_rate": 8.12497995969683e-06, + "loss": 0.0265, + "step": 5428, + "task_loss": 0.008003626018762589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955014727813703, + "compression_loss": 0.0, + "distillation_loss": 0.019893258810043335, + "epoch": 5.16, + "learning_rate": 8.11711987125936e-06, + "loss": 0.019, + "step": 5429, + "task_loss": 0.01055414229631424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955059758112848, + "compression_loss": 0.0, + "distillation_loss": 0.06559453159570694, + "epoch": 5.16, + "learning_rate": 8.109262849678378e-06, + "loss": 0.066, + "step": 5430, + "task_loss": 0.06944157928228378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955104758351714, + "compression_loss": 0.0, + "distillation_loss": 0.07780923694372177, + "epoch": 5.16, + "learning_rate": 8.101408896381141e-06, + "loss": 0.0707, + "step": 5431, + "task_loss": 0.006445575505495071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955149728540338, + "compression_loss": 0.0, + "distillation_loss": 0.049274250864982605, + "epoch": 5.16, + "learning_rate": 8.093558012794363e-06, + "loss": 0.0533, + "step": 5432, + "task_loss": 0.08973647654056549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955194668688758, + "compression_loss": 0.0, + "distillation_loss": 0.038362838327884674, + "epoch": 5.16, + "learning_rate": 8.085710200344202e-06, + "loss": 0.0464, + "step": 5433, + "task_loss": 0.1189565435051918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955239578807008, + "compression_loss": 0.0, + "distillation_loss": 0.14410632848739624, + "epoch": 5.16, + "learning_rate": 8.077865460456241e-06, + "loss": 0.1396, + "step": 5434, + "task_loss": 0.09874996542930603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955284458905129, + "compression_loss": 0.0, + "distillation_loss": 0.019637832418084145, + "epoch": 5.16, + "learning_rate": 8.070023794555521e-06, + "loss": 0.0285, + "step": 5435, + "task_loss": 0.1084957867860794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955329308993153, + "compression_loss": 0.0, + "distillation_loss": 0.022479506209492683, + "epoch": 5.16, + "learning_rate": 8.062185204066503e-06, + "loss": 0.0287, + "step": 5436, + "task_loss": 0.08433564752340317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955374129081121, + "compression_loss": 0.0, + "distillation_loss": 0.018630899488925934, + "epoch": 5.16, + "learning_rate": 8.054349690413124e-06, + "loss": 0.0186, + "step": 5437, + "task_loss": 0.018421385437250137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955418919179067, + "compression_loss": 0.0, + "distillation_loss": 0.1612919569015503, + "epoch": 5.16, + "learning_rate": 8.046517255018737e-06, + "loss": 0.1564, + "step": 5438, + "task_loss": 0.11284254491329193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955463679297029, + "compression_loss": 0.0, + "distillation_loss": 0.02024524286389351, + "epoch": 5.17, + "learning_rate": 8.03868789930613e-06, + "loss": 0.0189, + "step": 5439, + "task_loss": 0.006963614374399185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955508409445043, + "compression_loss": 0.0, + "distillation_loss": 0.08255909383296967, + "epoch": 5.17, + "learning_rate": 8.030861624697552e-06, + "loss": 0.0821, + "step": 5440, + "task_loss": 0.07832753658294678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955553109633149, + "compression_loss": 0.0, + "distillation_loss": 0.03200299292802811, + "epoch": 5.17, + "learning_rate": 8.023038432614693e-06, + "loss": 0.0304, + "step": 5441, + "task_loss": 0.01600661687552929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795559777987138, + "compression_loss": 0.0, + "distillation_loss": 0.044372882694005966, + "epoch": 5.17, + "learning_rate": 8.015218324478666e-06, + "loss": 0.0583, + "step": 5442, + "task_loss": 0.18402326107025146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955642420169773, + "compression_loss": 0.0, + "distillation_loss": 0.03316294774413109, + "epoch": 5.17, + "learning_rate": 8.007401301710022e-06, + "loss": 0.0323, + "step": 5443, + "task_loss": 0.02459472045302391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955687030538368, + "compression_loss": 0.0, + "distillation_loss": 0.04046769440174103, + "epoch": 5.17, + "learning_rate": 7.999587365728776e-06, + "loss": 0.0457, + "step": 5444, + "task_loss": 0.09298967570066452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955731610987198, + "compression_loss": 0.0, + "distillation_loss": 0.03207171708345413, + "epoch": 5.17, + "learning_rate": 7.991776517954359e-06, + "loss": 0.0461, + "step": 5445, + "task_loss": 0.17218343913555145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955776161526302, + "compression_loss": 0.0, + "distillation_loss": 0.05809757858514786, + "epoch": 5.17, + "learning_rate": 7.983968759805641e-06, + "loss": 0.0545, + "step": 5446, + "task_loss": 0.022373545914888382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955820682165717, + "compression_loss": 0.0, + "distillation_loss": 0.03011673502624035, + "epoch": 5.17, + "learning_rate": 7.976164092700958e-06, + "loss": 0.0321, + "step": 5447, + "task_loss": 0.050249215215444565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795586517291548, + "compression_loss": 0.0, + "distillation_loss": 0.08846057951450348, + "epoch": 5.17, + "learning_rate": 7.968362518058048e-06, + "loss": 0.085, + "step": 5448, + "task_loss": 0.05429108440876007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955909633785625, + "compression_loss": 0.0, + "distillation_loss": 0.2117672562599182, + "epoch": 5.17, + "learning_rate": 7.960564037294118e-06, + "loss": 0.2183, + "step": 5449, + "task_loss": 0.276851087808609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955954064786193, + "compression_loss": 0.0, + "distillation_loss": 0.06762327998876572, + "epoch": 5.18, + "learning_rate": 7.952768651825784e-06, + "loss": 0.0656, + "step": 5450, + "task_loss": 0.04782693088054657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7955998465927218, + "compression_loss": 0.0, + "distillation_loss": 0.0332944318652153, + "epoch": 5.18, + "learning_rate": 7.944976363069137e-06, + "loss": 0.0315, + "step": 5451, + "task_loss": 0.015227574855089188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956042837218736, + "compression_loss": 0.0, + "distillation_loss": 0.15575581789016724, + "epoch": 5.18, + "learning_rate": 7.937187172439669e-06, + "loss": 0.1656, + "step": 5452, + "task_loss": 0.25407275557518005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956087178670788, + "compression_loss": 0.0, + "distillation_loss": 0.04357504844665527, + "epoch": 5.18, + "learning_rate": 7.929401081352319e-06, + "loss": 0.0438, + "step": 5453, + "task_loss": 0.045449040830135345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956131490293407, + "compression_loss": 0.0, + "distillation_loss": 0.0458906814455986, + "epoch": 5.18, + "learning_rate": 7.921618091221484e-06, + "loss": 0.047, + "step": 5454, + "task_loss": 0.05692676454782486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956175772096631, + "compression_loss": 0.0, + "distillation_loss": 0.023164385929703712, + "epoch": 5.18, + "learning_rate": 7.91383820346097e-06, + "loss": 0.0319, + "step": 5455, + "task_loss": 0.1104351356625557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956220024090497, + "compression_loss": 0.0, + "distillation_loss": 0.04113226756453514, + "epoch": 5.18, + "learning_rate": 7.906061419484028e-06, + "loss": 0.0444, + "step": 5456, + "task_loss": 0.07416713237762451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956264246285041, + "compression_loss": 0.0, + "distillation_loss": 0.043819695711135864, + "epoch": 5.18, + "learning_rate": 7.898287740703356e-06, + "loss": 0.0427, + "step": 5457, + "task_loss": 0.03260737285017967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79563084386903, + "compression_loss": 0.0, + "distillation_loss": 0.028930241242051125, + "epoch": 5.18, + "learning_rate": 7.890517168531086e-06, + "loss": 0.0365, + "step": 5458, + "task_loss": 0.1042308360338211 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956352601316313, + "compression_loss": 0.0, + "distillation_loss": 0.20450790226459503, + "epoch": 5.18, + "learning_rate": 7.882749704378773e-06, + "loss": 0.1955, + "step": 5459, + "task_loss": 0.11405302584171295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956396734173115, + "compression_loss": 0.0, + "distillation_loss": 0.048243820667266846, + "epoch": 5.19, + "learning_rate": 7.874985349657405e-06, + "loss": 0.0511, + "step": 5460, + "task_loss": 0.07671748101711273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956440837270742, + "compression_loss": 0.0, + "distillation_loss": 0.012140437960624695, + "epoch": 5.19, + "learning_rate": 7.86722410577743e-06, + "loss": 0.0113, + "step": 5461, + "task_loss": 0.003828030079603195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956484910619231, + "compression_loss": 0.0, + "distillation_loss": 0.02375885099172592, + "epoch": 5.19, + "learning_rate": 7.85946597414871e-06, + "loss": 0.0221, + "step": 5462, + "task_loss": 0.006828447803854942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956528954228621, + "compression_loss": 0.0, + "distillation_loss": 0.07895895093679428, + "epoch": 5.19, + "learning_rate": 7.85171095618054e-06, + "loss": 0.0807, + "step": 5463, + "task_loss": 0.0966450572013855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956572968108947, + "compression_loss": 0.0, + "distillation_loss": 0.025494446977972984, + "epoch": 5.19, + "learning_rate": 7.843959053281663e-06, + "loss": 0.0237, + "step": 5464, + "task_loss": 0.007278071716427803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956616952270247, + "compression_loss": 0.0, + "distillation_loss": 0.020559193566441536, + "epoch": 5.19, + "learning_rate": 7.836210266860253e-06, + "loss": 0.0241, + "step": 5465, + "task_loss": 0.05645029619336128 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956660906722556, + "compression_loss": 0.0, + "distillation_loss": 0.0954541563987732, + "epoch": 5.19, + "learning_rate": 7.8284645983239e-06, + "loss": 0.0991, + "step": 5466, + "task_loss": 0.1314251720905304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956704831475913, + "compression_loss": 0.0, + "distillation_loss": 0.047063831239938736, + "epoch": 5.19, + "learning_rate": 7.820722049079653e-06, + "loss": 0.0666, + "step": 5467, + "task_loss": 0.24207653105258942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956748726540352, + "compression_loss": 0.0, + "distillation_loss": 0.04180522635579109, + "epoch": 5.19, + "learning_rate": 7.812982620533993e-06, + "loss": 0.0509, + "step": 5468, + "task_loss": 0.13276955485343933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956792591925913, + "compression_loss": 0.0, + "distillation_loss": 0.01592065393924713, + "epoch": 5.19, + "learning_rate": 7.805246314092809e-06, + "loss": 0.0236, + "step": 5469, + "task_loss": 0.09271353483200073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956836427642631, + "compression_loss": 0.0, + "distillation_loss": 0.032307371497154236, + "epoch": 5.19, + "learning_rate": 7.797513131161437e-06, + "loss": 0.0293, + "step": 5470, + "task_loss": 0.002665068954229355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956880233700542, + "compression_loss": 0.0, + "distillation_loss": 0.042782217264175415, + "epoch": 5.2, + "learning_rate": 7.78978307314466e-06, + "loss": 0.0452, + "step": 5471, + "task_loss": 0.06668760627508163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956924010109685, + "compression_loss": 0.0, + "distillation_loss": 0.02939591184258461, + "epoch": 5.2, + "learning_rate": 7.782056141446673e-06, + "loss": 0.0269, + "step": 5472, + "task_loss": 0.003960005939006805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7956967756880096, + "compression_loss": 0.0, + "distillation_loss": 0.13202489912509918, + "epoch": 5.2, + "learning_rate": 7.774332337471101e-06, + "loss": 0.136, + "step": 5473, + "task_loss": 0.17181508243083954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957011474021811, + "compression_loss": 0.0, + "distillation_loss": 0.02327493578195572, + "epoch": 5.2, + "learning_rate": 7.766611662621023e-06, + "loss": 0.0219, + "step": 5474, + "task_loss": 0.009945593774318695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957055161544868, + "compression_loss": 0.0, + "distillation_loss": 0.05180184915661812, + "epoch": 5.2, + "learning_rate": 7.75889411829894e-06, + "loss": 0.0526, + "step": 5475, + "task_loss": 0.05985014885663986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957098819459303, + "compression_loss": 0.0, + "distillation_loss": 0.09386663138866425, + "epoch": 5.2, + "learning_rate": 7.751179705906774e-06, + "loss": 0.0984, + "step": 5476, + "task_loss": 0.13911299407482147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957142447775153, + "compression_loss": 0.0, + "distillation_loss": 0.060429397970438004, + "epoch": 5.2, + "learning_rate": 7.74346842684588e-06, + "loss": 0.0566, + "step": 5477, + "task_loss": 0.022498732432723045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957186046502455, + "compression_loss": 0.0, + "distillation_loss": 0.02577110007405281, + "epoch": 5.2, + "learning_rate": 7.73576028251706e-06, + "loss": 0.0237, + "step": 5478, + "task_loss": 0.005201183259487152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957229615651246, + "compression_loss": 0.0, + "distillation_loss": 0.04601401090621948, + "epoch": 5.2, + "learning_rate": 7.728055274320527e-06, + "loss": 0.0437, + "step": 5479, + "task_loss": 0.02243119105696678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957273155231561, + "compression_loss": 0.0, + "distillation_loss": 0.11642314493656158, + "epoch": 5.2, + "learning_rate": 7.720353403655934e-06, + "loss": 0.1139, + "step": 5480, + "task_loss": 0.09131792187690735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795731666525344, + "compression_loss": 0.0, + "distillation_loss": 0.06120690703392029, + "epoch": 5.21, + "learning_rate": 7.712654671922371e-06, + "loss": 0.0697, + "step": 5481, + "task_loss": 0.14600446820259094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957360145726917, + "compression_loss": 0.0, + "distillation_loss": 0.014658894389867783, + "epoch": 5.21, + "learning_rate": 7.704959080518343e-06, + "loss": 0.0295, + "step": 5482, + "task_loss": 0.16297921538352966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957403596662032, + "compression_loss": 0.0, + "distillation_loss": 0.05037935450673103, + "epoch": 5.21, + "learning_rate": 7.697266630841784e-06, + "loss": 0.0564, + "step": 5483, + "task_loss": 0.1106264591217041 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957447018068818, + "compression_loss": 0.0, + "distillation_loss": 0.02987675368785858, + "epoch": 5.21, + "learning_rate": 7.689577324290073e-06, + "loss": 0.0273, + "step": 5484, + "task_loss": 0.004036994650959969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957490409957314, + "compression_loss": 0.0, + "distillation_loss": 0.06058541685342789, + "epoch": 5.21, + "learning_rate": 7.681891162260015e-06, + "loss": 0.061, + "step": 5485, + "task_loss": 0.06461737304925919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957533772337557, + "compression_loss": 0.0, + "distillation_loss": 0.024111486971378326, + "epoch": 5.21, + "learning_rate": 7.674208146147834e-06, + "loss": 0.0267, + "step": 5486, + "task_loss": 0.04959327355027199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957577105219583, + "compression_loss": 0.0, + "distillation_loss": 0.058419279754161835, + "epoch": 5.21, + "learning_rate": 7.666528277349177e-06, + "loss": 0.0589, + "step": 5487, + "task_loss": 0.06302666664123535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957620408613428, + "compression_loss": 0.0, + "distillation_loss": 0.04451620578765869, + "epoch": 5.21, + "learning_rate": 7.658851557259144e-06, + "loss": 0.0585, + "step": 5488, + "task_loss": 0.18458762764930725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957663682529131, + "compression_loss": 0.0, + "distillation_loss": 0.047050587832927704, + "epoch": 5.21, + "learning_rate": 7.651177987272243e-06, + "loss": 0.0492, + "step": 5489, + "task_loss": 0.06901868432760239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957706926976729, + "compression_loss": 0.0, + "distillation_loss": 0.03578869253396988, + "epoch": 5.21, + "learning_rate": 7.643507568782407e-06, + "loss": 0.033, + "step": 5490, + "task_loss": 0.007528049871325493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957750141966256, + "compression_loss": 0.0, + "distillation_loss": 0.04843487590551376, + "epoch": 5.21, + "learning_rate": 7.635840303183018e-06, + "loss": 0.0525, + "step": 5491, + "task_loss": 0.08893577009439468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957793327507751, + "compression_loss": 0.0, + "distillation_loss": 0.014655525796115398, + "epoch": 5.22, + "learning_rate": 7.628176191866853e-06, + "loss": 0.0138, + "step": 5492, + "task_loss": 0.005986766889691353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957836483611249, + "compression_loss": 0.0, + "distillation_loss": 0.036418359726667404, + "epoch": 5.22, + "learning_rate": 7.6205152362261586e-06, + "loss": 0.0488, + "step": 5493, + "task_loss": 0.1600310057401657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957879610286789, + "compression_loss": 0.0, + "distillation_loss": 0.031819455325603485, + "epoch": 5.22, + "learning_rate": 7.612857437652563e-06, + "loss": 0.0348, + "step": 5494, + "task_loss": 0.06123275309801102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957922707544407, + "compression_loss": 0.0, + "distillation_loss": 0.0890742614865303, + "epoch": 5.22, + "learning_rate": 7.605202797537156e-06, + "loss": 0.0832, + "step": 5495, + "task_loss": 0.03046075440943241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7957965775394139, + "compression_loss": 0.0, + "distillation_loss": 0.06095193326473236, + "epoch": 5.22, + "learning_rate": 7.5975513172704375e-06, + "loss": 0.0706, + "step": 5496, + "task_loss": 0.15771833062171936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958008813846024, + "compression_loss": 0.0, + "distillation_loss": 0.04849329590797424, + "epoch": 5.22, + "learning_rate": 7.589902998242327e-06, + "loss": 0.0612, + "step": 5497, + "task_loss": 0.17582949995994568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958051822910096, + "compression_loss": 0.0, + "distillation_loss": 0.08853879570960999, + "epoch": 5.22, + "learning_rate": 7.582257841842191e-06, + "loss": 0.0895, + "step": 5498, + "task_loss": 0.09831196069717407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958094802596393, + "compression_loss": 0.0, + "distillation_loss": 0.029421133920550346, + "epoch": 5.22, + "learning_rate": 7.574615849458805e-06, + "loss": 0.0488, + "step": 5499, + "task_loss": 0.22320178151130676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958137752914952, + "compression_loss": 0.0, + "distillation_loss": 0.10746181011199951, + "epoch": 5.22, + "learning_rate": 7.56697702248036e-06, + "loss": 0.1032, + "step": 5500, + "task_loss": 0.06493166834115982 + }, + { + "epoch": 5.22, + "eval_accuracy": 0.8795871559633027, + "eval_loss": 0.5020073056221008, + "eval_runtime": 18.0294, + "eval_samples_per_second": 48.365, + "eval_steps_per_second": 6.046, + "step": 5500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958180673875811, + "compression_loss": 0.0, + "distillation_loss": 0.07600829005241394, + "epoch": 5.22, + "learning_rate": 7.5593413622945e-06, + "loss": 0.0773, + "step": 5501, + "task_loss": 0.08857627213001251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958223565489004, + "compression_loss": 0.0, + "distillation_loss": 0.06297820806503296, + "epoch": 5.23, + "learning_rate": 7.551708870288282e-06, + "loss": 0.0582, + "step": 5502, + "task_loss": 0.0155994463711977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958266427764571, + "compression_loss": 0.0, + "distillation_loss": 0.045012425631284714, + "epoch": 5.23, + "learning_rate": 7.5440795478481815e-06, + "loss": 0.0409, + "step": 5503, + "task_loss": 0.0038008801639080048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958309260712546, + "compression_loss": 0.0, + "distillation_loss": 0.02455669268965721, + "epoch": 5.23, + "learning_rate": 7.536453396360091e-06, + "loss": 0.025, + "step": 5504, + "task_loss": 0.0289287306368351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958352064342967, + "compression_loss": 0.0, + "distillation_loss": 0.082015261054039, + "epoch": 5.23, + "learning_rate": 7.528830417209356e-06, + "loss": 0.0851, + "step": 5505, + "task_loss": 0.11327110230922699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958394838665872, + "compression_loss": 0.0, + "distillation_loss": 0.025041064247488976, + "epoch": 5.23, + "learning_rate": 7.521210611780716e-06, + "loss": 0.024, + "step": 5506, + "task_loss": 0.014456404373049736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958437583691296, + "compression_loss": 0.0, + "distillation_loss": 0.1386338621377945, + "epoch": 5.23, + "learning_rate": 7.513593981458347e-06, + "loss": 0.1412, + "step": 5507, + "task_loss": 0.1647292524576187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958480299429276, + "compression_loss": 0.0, + "distillation_loss": 0.11838963627815247, + "epoch": 5.23, + "learning_rate": 7.505980527625841e-06, + "loss": 0.1125, + "step": 5508, + "task_loss": 0.0594179704785347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795852298588985, + "compression_loss": 0.0, + "distillation_loss": 0.035721536725759506, + "epoch": 5.23, + "learning_rate": 7.498370251666223e-06, + "loss": 0.0433, + "step": 5509, + "task_loss": 0.1113739162683487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958565643083054, + "compression_loss": 0.0, + "distillation_loss": 0.04196429252624512, + "epoch": 5.23, + "learning_rate": 7.490763154961944e-06, + "loss": 0.0402, + "step": 5510, + "task_loss": 0.02450537495315075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958608271018925, + "compression_loss": 0.0, + "distillation_loss": 0.15733975172042847, + "epoch": 5.23, + "learning_rate": 7.483159238894858e-06, + "loss": 0.156, + "step": 5511, + "task_loss": 0.14443063735961914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79586508697075, + "compression_loss": 0.0, + "distillation_loss": 0.021348509937524796, + "epoch": 5.23, + "learning_rate": 7.475558504846264e-06, + "loss": 0.0288, + "step": 5512, + "task_loss": 0.09601381421089172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958693439158816, + "compression_loss": 0.0, + "distillation_loss": 0.03914246708154678, + "epoch": 5.24, + "learning_rate": 7.467960954196867e-06, + "loss": 0.0441, + "step": 5513, + "task_loss": 0.08865661174058914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958735979382908, + "compression_loss": 0.0, + "distillation_loss": 0.1477883756160736, + "epoch": 5.24, + "learning_rate": 7.460366588326797e-06, + "loss": 0.1582, + "step": 5514, + "task_loss": 0.25182464718818665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958778490389816, + "compression_loss": 0.0, + "distillation_loss": 0.041810937225818634, + "epoch": 5.24, + "learning_rate": 7.452775408615603e-06, + "loss": 0.0481, + "step": 5515, + "task_loss": 0.10518115013837814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958820972189574, + "compression_loss": 0.0, + "distillation_loss": 0.01617216318845749, + "epoch": 5.24, + "learning_rate": 7.44518741644227e-06, + "loss": 0.0226, + "step": 5516, + "task_loss": 0.07998733222484589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958863424792221, + "compression_loss": 0.0, + "distillation_loss": 0.05367042124271393, + "epoch": 5.24, + "learning_rate": 7.4376026131851875e-06, + "loss": 0.0558, + "step": 5517, + "task_loss": 0.07507097721099854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958905848207791, + "compression_loss": 0.0, + "distillation_loss": 0.07538196444511414, + "epoch": 5.24, + "learning_rate": 7.430021000222156e-06, + "loss": 0.0736, + "step": 5518, + "task_loss": 0.05794493108987808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958948242446324, + "compression_loss": 0.0, + "distillation_loss": 0.09253949671983719, + "epoch": 5.24, + "learning_rate": 7.422442578930444e-06, + "loss": 0.0853, + "step": 5519, + "task_loss": 0.019849814474582672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7958990607517855, + "compression_loss": 0.0, + "distillation_loss": 0.016655316576361656, + "epoch": 5.24, + "learning_rate": 7.414867350686694e-06, + "loss": 0.0227, + "step": 5520, + "task_loss": 0.07758677005767822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959032943432421, + "compression_loss": 0.0, + "distillation_loss": 0.06732715666294098, + "epoch": 5.24, + "learning_rate": 7.407295316866977e-06, + "loss": 0.0669, + "step": 5521, + "task_loss": 0.06263066828250885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795907525020006, + "compression_loss": 0.0, + "distillation_loss": 0.10630275309085846, + "epoch": 5.24, + "learning_rate": 7.399726478846788e-06, + "loss": 0.1214, + "step": 5522, + "task_loss": 0.2570263147354126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959117527830806, + "compression_loss": 0.0, + "distillation_loss": 0.05409185588359833, + "epoch": 5.25, + "learning_rate": 7.392160838001055e-06, + "loss": 0.0691, + "step": 5523, + "task_loss": 0.20388929545879364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959159776334699, + "compression_loss": 0.0, + "distillation_loss": 0.04524946212768555, + "epoch": 5.25, + "learning_rate": 7.384598395704106e-06, + "loss": 0.0479, + "step": 5524, + "task_loss": 0.0716903880238533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959201995721774, + "compression_loss": 0.0, + "distillation_loss": 0.2098562866449356, + "epoch": 5.25, + "learning_rate": 7.377039153329687e-06, + "loss": 0.2018, + "step": 5525, + "task_loss": 0.12896330654621124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959244186002068, + "compression_loss": 0.0, + "distillation_loss": 0.037853680551052094, + "epoch": 5.25, + "learning_rate": 7.36948311225098e-06, + "loss": 0.0462, + "step": 5526, + "task_loss": 0.12155872583389282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959286347185619, + "compression_loss": 0.0, + "distillation_loss": 0.018217723816633224, + "epoch": 5.25, + "learning_rate": 7.361930273840581e-06, + "loss": 0.0254, + "step": 5527, + "task_loss": 0.09013350307941437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959328479282463, + "compression_loss": 0.0, + "distillation_loss": 0.07805530726909637, + "epoch": 5.25, + "learning_rate": 7.3543806394704955e-06, + "loss": 0.0762, + "step": 5528, + "task_loss": 0.05900496616959572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959370582302636, + "compression_loss": 0.0, + "distillation_loss": 0.07341839373111725, + "epoch": 5.25, + "learning_rate": 7.346834210512138e-06, + "loss": 0.0724, + "step": 5529, + "task_loss": 0.06279435008764267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959412656256175, + "compression_loss": 0.0, + "distillation_loss": 0.02832254022359848, + "epoch": 5.25, + "learning_rate": 7.3392909883363755e-06, + "loss": 0.0378, + "step": 5530, + "task_loss": 0.12311002612113953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959454701153119, + "compression_loss": 0.0, + "distillation_loss": 0.04221939295530319, + "epoch": 5.25, + "learning_rate": 7.331750974313459e-06, + "loss": 0.0418, + "step": 5531, + "task_loss": 0.03794408589601517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959496717003502, + "compression_loss": 0.0, + "distillation_loss": 0.11199971288442612, + "epoch": 5.25, + "learning_rate": 7.324214169813062e-06, + "loss": 0.108, + "step": 5532, + "task_loss": 0.07245731353759766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959538703817363, + "compression_loss": 0.0, + "distillation_loss": 0.026987843215465546, + "epoch": 5.25, + "learning_rate": 7.316680576204296e-06, + "loss": 0.0251, + "step": 5533, + "task_loss": 0.00792054831981659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959580661604737, + "compression_loss": 0.0, + "distillation_loss": 0.049572162330150604, + "epoch": 5.26, + "learning_rate": 7.309150194855668e-06, + "loss": 0.0507, + "step": 5534, + "task_loss": 0.06096307933330536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959622590375662, + "compression_loss": 0.0, + "distillation_loss": 0.022030137479305267, + "epoch": 5.26, + "learning_rate": 7.301623027135099e-06, + "loss": 0.0206, + "step": 5535, + "task_loss": 0.008088313043117523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959664490140175, + "compression_loss": 0.0, + "distillation_loss": 0.2133478969335556, + "epoch": 5.26, + "learning_rate": 7.294099074409944e-06, + "loss": 0.2018, + "step": 5536, + "task_loss": 0.09802666306495667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959706360908312, + "compression_loss": 0.0, + "distillation_loss": 0.020365485921502113, + "epoch": 5.26, + "learning_rate": 7.28657833804697e-06, + "loss": 0.0194, + "step": 5537, + "task_loss": 0.01060546562075615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795974820269011, + "compression_loss": 0.0, + "distillation_loss": 0.04007776081562042, + "epoch": 5.26, + "learning_rate": 7.279060819412351e-06, + "loss": 0.0394, + "step": 5538, + "task_loss": 0.03316129371523857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959790015495606, + "compression_loss": 0.0, + "distillation_loss": 0.03753858804702759, + "epoch": 5.26, + "learning_rate": 7.271546519871672e-06, + "loss": 0.0461, + "step": 5539, + "task_loss": 0.12282435595989227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959831799334837, + "compression_loss": 0.0, + "distillation_loss": 0.029383216053247452, + "epoch": 5.26, + "learning_rate": 7.264035440789954e-06, + "loss": 0.0275, + "step": 5540, + "task_loss": 0.010303637012839317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959873554217839, + "compression_loss": 0.0, + "distillation_loss": 0.06451848894357681, + "epoch": 5.26, + "learning_rate": 7.25652758353162e-06, + "loss": 0.0609, + "step": 5541, + "task_loss": 0.02830340340733528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.795991528015465, + "compression_loss": 0.0, + "distillation_loss": 0.07822129875421524, + "epoch": 5.26, + "learning_rate": 7.249022949460493e-06, + "loss": 0.0823, + "step": 5542, + "task_loss": 0.1194855198264122 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959956977155306, + "compression_loss": 0.0, + "distillation_loss": 0.11556608974933624, + "epoch": 5.26, + "learning_rate": 7.2415215399398435e-06, + "loss": 0.1179, + "step": 5543, + "task_loss": 0.13891972601413727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7959998645229844, + "compression_loss": 0.0, + "distillation_loss": 0.017915979027748108, + "epoch": 5.26, + "learning_rate": 7.2340233563323284e-06, + "loss": 0.0175, + "step": 5544, + "task_loss": 0.014106318354606628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960040284388301, + "compression_loss": 0.0, + "distillation_loss": 0.05048777535557747, + "epoch": 5.27, + "learning_rate": 7.226528400000038e-06, + "loss": 0.0469, + "step": 5545, + "task_loss": 0.014579741284251213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960081894640715, + "compression_loss": 0.0, + "distillation_loss": 0.02436014637351036, + "epoch": 5.27, + "learning_rate": 7.219036672304452e-06, + "loss": 0.0269, + "step": 5546, + "task_loss": 0.04929090291261673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796012347599712, + "compression_loss": 0.0, + "distillation_loss": 0.1192215234041214, + "epoch": 5.27, + "learning_rate": 7.2115481746065e-06, + "loss": 0.1251, + "step": 5547, + "task_loss": 0.17761507630348206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960165028467554, + "compression_loss": 0.0, + "distillation_loss": 0.05981665849685669, + "epoch": 5.27, + "learning_rate": 7.20406290826649e-06, + "loss": 0.0593, + "step": 5548, + "task_loss": 0.054768696427345276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960206552062056, + "compression_loss": 0.0, + "distillation_loss": 0.07405254989862442, + "epoch": 5.27, + "learning_rate": 7.196580874644151e-06, + "loss": 0.079, + "step": 5549, + "task_loss": 0.12388560175895691 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960248046790659, + "compression_loss": 0.0, + "distillation_loss": 0.0403749905526638, + "epoch": 5.27, + "learning_rate": 7.1891020750986475e-06, + "loss": 0.0485, + "step": 5550, + "task_loss": 0.12145104259252548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960289512663403, + "compression_loss": 0.0, + "distillation_loss": 0.02248249761760235, + "epoch": 5.27, + "learning_rate": 7.181626510988529e-06, + "loss": 0.0207, + "step": 5551, + "task_loss": 0.004492869600653648 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960330949690324, + "compression_loss": 0.0, + "distillation_loss": 0.07457023113965988, + "epoch": 5.27, + "learning_rate": 7.174154183671763e-06, + "loss": 0.082, + "step": 5552, + "task_loss": 0.148670956492424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960372357881458, + "compression_loss": 0.0, + "distillation_loss": 0.06673236936330795, + "epoch": 5.27, + "learning_rate": 7.166685094505737e-06, + "loss": 0.075, + "step": 5553, + "task_loss": 0.14950726926326752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960413737246842, + "compression_loss": 0.0, + "distillation_loss": 0.036908023059368134, + "epoch": 5.27, + "learning_rate": 7.15921924484726e-06, + "loss": 0.052, + "step": 5554, + "task_loss": 0.18809108436107635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960455087796513, + "compression_loss": 0.0, + "distillation_loss": 0.021199818700551987, + "epoch": 5.28, + "learning_rate": 7.1517566360525284e-06, + "loss": 0.0293, + "step": 5555, + "task_loss": 0.10172471404075623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960496409540508, + "compression_loss": 0.0, + "distillation_loss": 0.10079994797706604, + "epoch": 5.28, + "learning_rate": 7.1442972694771545e-06, + "loss": 0.0973, + "step": 5556, + "task_loss": 0.06552006304264069 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960537702488863, + "compression_loss": 0.0, + "distillation_loss": 0.0728619322180748, + "epoch": 5.28, + "learning_rate": 7.136841146476181e-06, + "loss": 0.0749, + "step": 5557, + "task_loss": 0.09358760714530945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960578966651618, + "compression_loss": 0.0, + "distillation_loss": 0.026422590017318726, + "epoch": 5.28, + "learning_rate": 7.129388268404047e-06, + "loss": 0.0249, + "step": 5558, + "task_loss": 0.011311305686831474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960620202038805, + "compression_loss": 0.0, + "distillation_loss": 0.04695506393909454, + "epoch": 5.28, + "learning_rate": 7.121938636614589e-06, + "loss": 0.0484, + "step": 5559, + "task_loss": 0.06093965470790863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960661408660464, + "compression_loss": 0.0, + "distillation_loss": 0.02747797593474388, + "epoch": 5.28, + "learning_rate": 7.114492252461089e-06, + "loss": 0.0339, + "step": 5560, + "task_loss": 0.09208115935325623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960702586526632, + "compression_loss": 0.0, + "distillation_loss": 0.03135443478822708, + "epoch": 5.28, + "learning_rate": 7.1070491172962e-06, + "loss": 0.0301, + "step": 5561, + "task_loss": 0.01891678385436535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960743735647344, + "compression_loss": 0.0, + "distillation_loss": 0.060153115540742874, + "epoch": 5.28, + "learning_rate": 7.099609232472021e-06, + "loss": 0.0603, + "step": 5562, + "task_loss": 0.06156299635767937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960784856032638, + "compression_loss": 0.0, + "distillation_loss": 0.02115842141211033, + "epoch": 5.28, + "learning_rate": 7.092172599340024e-06, + "loss": 0.0236, + "step": 5563, + "task_loss": 0.045783985406160355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960825947692549, + "compression_loss": 0.0, + "distillation_loss": 0.016983792185783386, + "epoch": 5.28, + "learning_rate": 7.084739219251129e-06, + "loss": 0.0159, + "step": 5564, + "task_loss": 0.005871200934052467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960867010637117, + "compression_loss": 0.0, + "distillation_loss": 0.03363943099975586, + "epoch": 5.28, + "learning_rate": 7.0773090935556365e-06, + "loss": 0.0311, + "step": 5565, + "task_loss": 0.00811656005680561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960908044876377, + "compression_loss": 0.0, + "distillation_loss": 0.019596152007579803, + "epoch": 5.29, + "learning_rate": 7.0698822236032554e-06, + "loss": 0.018, + "step": 5566, + "task_loss": 0.0034526288509368896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7960949050420365, + "compression_loss": 0.0, + "distillation_loss": 0.04396222531795502, + "epoch": 5.29, + "learning_rate": 7.0624586107431276e-06, + "loss": 0.049, + "step": 5567, + "task_loss": 0.09396674484014511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796099002727912, + "compression_loss": 0.0, + "distillation_loss": 0.04795515164732933, + "epoch": 5.29, + "learning_rate": 7.055038256323782e-06, + "loss": 0.0508, + "step": 5568, + "task_loss": 0.07625043392181396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961030975462677, + "compression_loss": 0.0, + "distillation_loss": 0.03373149782419205, + "epoch": 5.29, + "learning_rate": 7.047621161693152e-06, + "loss": 0.0355, + "step": 5569, + "task_loss": 0.051260001957416534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961071894981073, + "compression_loss": 0.0, + "distillation_loss": 0.042289115488529205, + "epoch": 5.29, + "learning_rate": 7.040207328198601e-06, + "loss": 0.0427, + "step": 5570, + "task_loss": 0.04608525335788727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961112785844346, + "compression_loss": 0.0, + "distillation_loss": 0.0194284338504076, + "epoch": 5.29, + "learning_rate": 7.032796757186888e-06, + "loss": 0.0219, + "step": 5571, + "task_loss": 0.04372316598892212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961153648062532, + "compression_loss": 0.0, + "distillation_loss": 0.028270315378904343, + "epoch": 5.29, + "learning_rate": 7.025389450004177e-06, + "loss": 0.0273, + "step": 5572, + "task_loss": 0.018285321071743965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961194481645667, + "compression_loss": 0.0, + "distillation_loss": 0.11180476099252701, + "epoch": 5.29, + "learning_rate": 7.017985407996031e-06, + "loss": 0.1099, + "step": 5573, + "task_loss": 0.09264665842056274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961235286603789, + "compression_loss": 0.0, + "distillation_loss": 0.08373094350099564, + "epoch": 5.29, + "learning_rate": 7.010584632507444e-06, + "loss": 0.0796, + "step": 5574, + "task_loss": 0.04215511679649353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961276062946936, + "compression_loss": 0.0, + "distillation_loss": 0.08096636831760406, + "epoch": 5.29, + "learning_rate": 7.0031871248827985e-06, + "loss": 0.0826, + "step": 5575, + "task_loss": 0.0974457859992981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961316810685142, + "compression_loss": 0.0, + "distillation_loss": 0.12849192321300507, + "epoch": 5.3, + "learning_rate": 6.99579288646588e-06, + "loss": 0.1266, + "step": 5576, + "task_loss": 0.10947559773921967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961357529828446, + "compression_loss": 0.0, + "distillation_loss": 0.08771924674510956, + "epoch": 5.3, + "learning_rate": 6.988401918599896e-06, + "loss": 0.1008, + "step": 5577, + "task_loss": 0.2185194492340088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961398220386883, + "compression_loss": 0.0, + "distillation_loss": 0.10648892819881439, + "epoch": 5.3, + "learning_rate": 6.981014222627444e-06, + "loss": 0.0988, + "step": 5578, + "task_loss": 0.029211118817329407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961438882370491, + "compression_loss": 0.0, + "distillation_loss": 0.020999478176236153, + "epoch": 5.3, + "learning_rate": 6.973629799890544e-06, + "loss": 0.0194, + "step": 5579, + "task_loss": 0.005250850692391396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961479515789307, + "compression_loss": 0.0, + "distillation_loss": 0.04771939665079117, + "epoch": 5.3, + "learning_rate": 6.9662486517306005e-06, + "loss": 0.0527, + "step": 5580, + "task_loss": 0.0980047881603241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961520120653367, + "compression_loss": 0.0, + "distillation_loss": 0.051039569079875946, + "epoch": 5.3, + "learning_rate": 6.958870779488447e-06, + "loss": 0.0487, + "step": 5581, + "task_loss": 0.027397800236940384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961560696972708, + "compression_loss": 0.0, + "distillation_loss": 0.027529723942279816, + "epoch": 5.3, + "learning_rate": 6.951496184504306e-06, + "loss": 0.028, + "step": 5582, + "task_loss": 0.03213750571012497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961601244757368, + "compression_loss": 0.0, + "distillation_loss": 0.09078949689865112, + "epoch": 5.3, + "learning_rate": 6.944124868117796e-06, + "loss": 0.0868, + "step": 5583, + "task_loss": 0.05128881335258484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961641764017382, + "compression_loss": 0.0, + "distillation_loss": 0.021352669224143028, + "epoch": 5.3, + "learning_rate": 6.93675683166797e-06, + "loss": 0.0196, + "step": 5584, + "task_loss": 0.00414588488638401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961682254762789, + "compression_loss": 0.0, + "distillation_loss": 0.033476557582616806, + "epoch": 5.3, + "learning_rate": 6.92939207649326e-06, + "loss": 0.0385, + "step": 5585, + "task_loss": 0.08368590474128723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961722717003623, + "compression_loss": 0.0, + "distillation_loss": 0.025415003299713135, + "epoch": 5.3, + "learning_rate": 6.922030603931506e-06, + "loss": 0.0263, + "step": 5586, + "task_loss": 0.03441638872027397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961763150749924, + "compression_loss": 0.0, + "distillation_loss": 0.03962051123380661, + "epoch": 5.31, + "learning_rate": 6.914672415319945e-06, + "loss": 0.0514, + "step": 5587, + "task_loss": 0.1574546843767166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961803556011726, + "compression_loss": 0.0, + "distillation_loss": 0.029824821278452873, + "epoch": 5.31, + "learning_rate": 6.907317511995251e-06, + "loss": 0.0368, + "step": 5588, + "task_loss": 0.09920459985733032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961843932799068, + "compression_loss": 0.0, + "distillation_loss": 0.041083455085754395, + "epoch": 5.31, + "learning_rate": 6.8999658952934695e-06, + "loss": 0.0547, + "step": 5589, + "task_loss": 0.1776200234889984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961884281121985, + "compression_loss": 0.0, + "distillation_loss": 0.07152897119522095, + "epoch": 5.31, + "learning_rate": 6.892617566550044e-06, + "loss": 0.0727, + "step": 5590, + "task_loss": 0.0837155282497406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961924600990515, + "compression_loss": 0.0, + "distillation_loss": 0.015575017780065536, + "epoch": 5.31, + "learning_rate": 6.885272527099853e-06, + "loss": 0.0203, + "step": 5591, + "task_loss": 0.06283999979496002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7961964892414695, + "compression_loss": 0.0, + "distillation_loss": 0.22274234890937805, + "epoch": 5.31, + "learning_rate": 6.87793077827715e-06, + "loss": 0.2181, + "step": 5592, + "task_loss": 0.17664700746536255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796200515540456, + "compression_loss": 0.0, + "distillation_loss": 0.09952651709318161, + "epoch": 5.31, + "learning_rate": 6.8705923214155945e-06, + "loss": 0.1033, + "step": 5593, + "task_loss": 0.13760796189308167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962045389970149, + "compression_loss": 0.0, + "distillation_loss": 0.013694360852241516, + "epoch": 5.31, + "learning_rate": 6.863257157848252e-06, + "loss": 0.0129, + "step": 5594, + "task_loss": 0.006061408668756485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962085596121498, + "compression_loss": 0.0, + "distillation_loss": 0.03154522925615311, + "epoch": 5.31, + "learning_rate": 6.8559252889076e-06, + "loss": 0.0357, + "step": 5595, + "task_loss": 0.0730682909488678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962125773868644, + "compression_loss": 0.0, + "distillation_loss": 0.05232825130224228, + "epoch": 5.31, + "learning_rate": 6.848596715925493e-06, + "loss": 0.0489, + "step": 5596, + "task_loss": 0.018482720479369164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962165923221622, + "compression_loss": 0.0, + "distillation_loss": 0.020783985033631325, + "epoch": 5.32, + "learning_rate": 6.8412714402332125e-06, + "loss": 0.0192, + "step": 5597, + "task_loss": 0.0049543604254722595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962206044190472, + "compression_loss": 0.0, + "distillation_loss": 0.01990017667412758, + "epoch": 5.32, + "learning_rate": 6.833949463161438e-06, + "loss": 0.0335, + "step": 5598, + "task_loss": 0.1554040163755417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962246136785229, + "compression_loss": 0.0, + "distillation_loss": 0.027097290381789207, + "epoch": 5.32, + "learning_rate": 6.826630786040228e-06, + "loss": 0.0248, + "step": 5599, + "task_loss": 0.0043129827827215195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796228620101593, + "compression_loss": 0.0, + "distillation_loss": 0.04166903346776962, + "epoch": 5.32, + "learning_rate": 6.819315410199062e-06, + "loss": 0.0441, + "step": 5600, + "task_loss": 0.06640726327896118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962326236892612, + "compression_loss": 0.0, + "distillation_loss": 0.02872236631810665, + "epoch": 5.32, + "learning_rate": 6.812003336966802e-06, + "loss": 0.0325, + "step": 5601, + "task_loss": 0.06612066179513931 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962366244425311, + "compression_loss": 0.0, + "distillation_loss": 0.04668932408094406, + "epoch": 5.32, + "learning_rate": 6.8046945676717375e-06, + "loss": 0.0442, + "step": 5602, + "task_loss": 0.02129777893424034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962406223624064, + "compression_loss": 0.0, + "distillation_loss": 0.010439068078994751, + "epoch": 5.32, + "learning_rate": 6.7973891036415354e-06, + "loss": 0.0101, + "step": 5603, + "task_loss": 0.007036501541733742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796244617449891, + "compression_loss": 0.0, + "distillation_loss": 0.017626464366912842, + "epoch": 5.32, + "learning_rate": 6.790086946203253e-06, + "loss": 0.0233, + "step": 5604, + "task_loss": 0.07470369338989258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962486097059883, + "compression_loss": 0.0, + "distillation_loss": 0.1538507342338562, + "epoch": 5.32, + "learning_rate": 6.78278809668339e-06, + "loss": 0.152, + "step": 5605, + "task_loss": 0.13536766171455383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962525991317022, + "compression_loss": 0.0, + "distillation_loss": 0.05390554666519165, + "epoch": 5.32, + "learning_rate": 6.775492556407806e-06, + "loss": 0.054, + "step": 5606, + "task_loss": 0.05467413738369942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962565857280361, + "compression_loss": 0.0, + "distillation_loss": 0.06552339345216751, + "epoch": 5.32, + "learning_rate": 6.768200326701768e-06, + "loss": 0.0617, + "step": 5607, + "task_loss": 0.027765335515141487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962605694959941, + "compression_loss": 0.0, + "distillation_loss": 0.04036558046936989, + "epoch": 5.33, + "learning_rate": 6.760911408889939e-06, + "loss": 0.0452, + "step": 5608, + "task_loss": 0.08895239233970642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962645504365795, + "compression_loss": 0.0, + "distillation_loss": 0.14514732360839844, + "epoch": 5.33, + "learning_rate": 6.753625804296401e-06, + "loss": 0.1502, + "step": 5609, + "task_loss": 0.19537782669067383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962685285507961, + "compression_loss": 0.0, + "distillation_loss": 0.04015417769551277, + "epoch": 5.33, + "learning_rate": 6.746343514244611e-06, + "loss": 0.0367, + "step": 5610, + "task_loss": 0.006075270473957062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962725038396478, + "compression_loss": 0.0, + "distillation_loss": 0.07076828181743622, + "epoch": 5.33, + "learning_rate": 6.739064540057424e-06, + "loss": 0.0669, + "step": 5611, + "task_loss": 0.03183208778500557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962764763041379, + "compression_loss": 0.0, + "distillation_loss": 0.0222453735768795, + "epoch": 5.33, + "learning_rate": 6.731788883057116e-06, + "loss": 0.0287, + "step": 5612, + "task_loss": 0.08726339787244797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962804459452703, + "compression_loss": 0.0, + "distillation_loss": 0.03244253247976303, + "epoch": 5.33, + "learning_rate": 6.724516544565332e-06, + "loss": 0.0367, + "step": 5613, + "task_loss": 0.07509634643793106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962844127640487, + "compression_loss": 0.0, + "distillation_loss": 0.017709966748952866, + "epoch": 5.33, + "learning_rate": 6.717247525903142e-06, + "loss": 0.0164, + "step": 5614, + "task_loss": 0.004615955054759979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962883767614767, + "compression_loss": 0.0, + "distillation_loss": 0.04239179193973541, + "epoch": 5.33, + "learning_rate": 6.709981828390979e-06, + "loss": 0.0433, + "step": 5615, + "task_loss": 0.051517605781555176 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796292337938558, + "compression_loss": 0.0, + "distillation_loss": 0.0392778217792511, + "epoch": 5.33, + "learning_rate": 6.70271945334871e-06, + "loss": 0.0433, + "step": 5616, + "task_loss": 0.07987174391746521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7962962962962964, + "compression_loss": 0.0, + "distillation_loss": 0.05544783174991608, + "epoch": 5.33, + "learning_rate": 6.695460402095577e-06, + "loss": 0.0553, + "step": 5617, + "task_loss": 0.0541701577603817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963002518356953, + "compression_loss": 0.0, + "distillation_loss": 0.025989163666963577, + "epoch": 5.34, + "learning_rate": 6.688204675950205e-06, + "loss": 0.0389, + "step": 5618, + "task_loss": 0.15557032823562622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963042045577587, + "compression_loss": 0.0, + "distillation_loss": 0.029921121895313263, + "epoch": 5.34, + "learning_rate": 6.6809522762306566e-06, + "loss": 0.0329, + "step": 5619, + "task_loss": 0.05976049602031708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963081544634901, + "compression_loss": 0.0, + "distillation_loss": 0.044836804270744324, + "epoch": 5.34, + "learning_rate": 6.673703204254347e-06, + "loss": 0.0436, + "step": 5620, + "task_loss": 0.03275785595178604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963121015538933, + "compression_loss": 0.0, + "distillation_loss": 0.037071071565151215, + "epoch": 5.34, + "learning_rate": 6.666457461338108e-06, + "loss": 0.0357, + "step": 5621, + "task_loss": 0.023638306185603142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963160458299718, + "compression_loss": 0.0, + "distillation_loss": 0.0316130630671978, + "epoch": 5.34, + "learning_rate": 6.659215048798164e-06, + "loss": 0.0289, + "step": 5622, + "task_loss": 0.004098406061530113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963199872927296, + "compression_loss": 0.0, + "distillation_loss": 0.021150682121515274, + "epoch": 5.34, + "learning_rate": 6.651975967950147e-06, + "loss": 0.0196, + "step": 5623, + "task_loss": 0.005362769588828087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79632392594317, + "compression_loss": 0.0, + "distillation_loss": 0.029035231098532677, + "epoch": 5.34, + "learning_rate": 6.644740220109058e-06, + "loss": 0.0267, + "step": 5624, + "task_loss": 0.0061414651572704315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963278617822969, + "compression_loss": 0.0, + "distillation_loss": 0.06975620985031128, + "epoch": 5.34, + "learning_rate": 6.6375078065893e-06, + "loss": 0.0782, + "step": 5625, + "task_loss": 0.15371599793434143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963317948111139, + "compression_loss": 0.0, + "distillation_loss": 0.06846155226230621, + "epoch": 5.34, + "learning_rate": 6.630278728704692e-06, + "loss": 0.0738, + "step": 5626, + "task_loss": 0.12186034023761749 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963357250306248, + "compression_loss": 0.0, + "distillation_loss": 0.05929230898618698, + "epoch": 5.34, + "learning_rate": 6.6230529877684215e-06, + "loss": 0.0548, + "step": 5627, + "task_loss": 0.014448923990130424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963396524418331, + "compression_loss": 0.0, + "distillation_loss": 0.032660387456417084, + "epoch": 5.34, + "learning_rate": 6.615830585093074e-06, + "loss": 0.0351, + "step": 5628, + "task_loss": 0.05667547136545181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963435770457427, + "compression_loss": 0.0, + "distillation_loss": 0.01413150317966938, + "epoch": 5.35, + "learning_rate": 6.6086115219906485e-06, + "loss": 0.0132, + "step": 5629, + "task_loss": 0.004492944106459618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963474988433571, + "compression_loss": 0.0, + "distillation_loss": 0.11651385575532913, + "epoch": 5.35, + "learning_rate": 6.601395799772503e-06, + "loss": 0.1489, + "step": 5630, + "task_loss": 0.4406876266002655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79635141783568, + "compression_loss": 0.0, + "distillation_loss": 0.08212954550981522, + "epoch": 5.35, + "learning_rate": 6.594183419749431e-06, + "loss": 0.0788, + "step": 5631, + "task_loss": 0.048624441027641296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963553340237152, + "compression_loss": 0.0, + "distillation_loss": 0.024417495355010033, + "epoch": 5.35, + "learning_rate": 6.586974383231573e-06, + "loss": 0.0427, + "step": 5632, + "task_loss": 0.20715758204460144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963592474084663, + "compression_loss": 0.0, + "distillation_loss": 0.07218081504106522, + "epoch": 5.35, + "learning_rate": 6.579768691528504e-06, + "loss": 0.0702, + "step": 5633, + "task_loss": 0.052654679864645004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796363157990937, + "compression_loss": 0.0, + "distillation_loss": 0.04733956977725029, + "epoch": 5.35, + "learning_rate": 6.572566345949166e-06, + "loss": 0.0439, + "step": 5634, + "task_loss": 0.012745276093482971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796367065772131, + "compression_loss": 0.0, + "distillation_loss": 0.02717157080769539, + "epoch": 5.35, + "learning_rate": 6.565367347801893e-06, + "loss": 0.0268, + "step": 5635, + "task_loss": 0.023610301315784454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796370970753052, + "compression_loss": 0.0, + "distillation_loss": 0.029316646978259087, + "epoch": 5.35, + "learning_rate": 6.5581716983944274e-06, + "loss": 0.0272, + "step": 5636, + "task_loss": 0.008220043033361435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963748729347035, + "compression_loss": 0.0, + "distillation_loss": 0.12085194140672684, + "epoch": 5.35, + "learning_rate": 6.550979399033894e-06, + "loss": 0.1194, + "step": 5637, + "task_loss": 0.10640271008014679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963787723180894, + "compression_loss": 0.0, + "distillation_loss": 0.033697567880153656, + "epoch": 5.35, + "learning_rate": 6.5437904510267935e-06, + "loss": 0.0313, + "step": 5638, + "task_loss": 0.010095924139022827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963826689042134, + "compression_loss": 0.0, + "distillation_loss": 0.14860667288303375, + "epoch": 5.36, + "learning_rate": 6.536604855679043e-06, + "loss": 0.1594, + "step": 5639, + "task_loss": 0.2568969428539276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963865626940789, + "compression_loss": 0.0, + "distillation_loss": 0.030187159776687622, + "epoch": 5.36, + "learning_rate": 6.529422614295949e-06, + "loss": 0.0275, + "step": 5640, + "task_loss": 0.003732619807124138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79639045368869, + "compression_loss": 0.0, + "distillation_loss": 0.024757564067840576, + "epoch": 5.36, + "learning_rate": 6.522243728182195e-06, + "loss": 0.0257, + "step": 5641, + "task_loss": 0.03456170856952667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79639434188905, + "compression_loss": 0.0, + "distillation_loss": 0.07199036329984665, + "epoch": 5.36, + "learning_rate": 6.5150681986418466e-06, + "loss": 0.0716, + "step": 5642, + "task_loss": 0.06780923902988434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7963982272961627, + "compression_loss": 0.0, + "distillation_loss": 0.02806227095425129, + "epoch": 5.36, + "learning_rate": 6.507896026978394e-06, + "loss": 0.0354, + "step": 5643, + "task_loss": 0.1011197566986084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964021099110319, + "compression_loss": 0.0, + "distillation_loss": 0.05786515027284622, + "epoch": 5.36, + "learning_rate": 6.500727214494687e-06, + "loss": 0.0632, + "step": 5644, + "task_loss": 0.11146190017461777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964059897346611, + "compression_loss": 0.0, + "distillation_loss": 0.05632102116942406, + "epoch": 5.36, + "learning_rate": 6.493561762492966e-06, + "loss": 0.077, + "step": 5645, + "task_loss": 0.2627117931842804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964098667680543, + "compression_loss": 0.0, + "distillation_loss": 0.05039035528898239, + "epoch": 5.36, + "learning_rate": 6.48639967227489e-06, + "loss": 0.056, + "step": 5646, + "task_loss": 0.10663844645023346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964137410122147, + "compression_loss": 0.0, + "distillation_loss": 0.057316750288009644, + "epoch": 5.36, + "learning_rate": 6.4792409451414735e-06, + "loss": 0.0602, + "step": 5647, + "task_loss": 0.08620242029428482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964176124681465, + "compression_loss": 0.0, + "distillation_loss": 0.04628031328320503, + "epoch": 5.36, + "learning_rate": 6.472085582393128e-06, + "loss": 0.0488, + "step": 5648, + "task_loss": 0.07143149524927139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796421481136853, + "compression_loss": 0.0, + "distillation_loss": 0.1507255733013153, + "epoch": 5.36, + "learning_rate": 6.4649335853296685e-06, + "loss": 0.1469, + "step": 5649, + "task_loss": 0.11250782012939453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796425347019338, + "compression_loss": 0.0, + "distillation_loss": 0.07052607089281082, + "epoch": 5.37, + "learning_rate": 6.457784955250296e-06, + "loss": 0.077, + "step": 5650, + "task_loss": 0.13528487086296082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964292101166052, + "compression_loss": 0.0, + "distillation_loss": 0.012097038328647614, + "epoch": 5.37, + "learning_rate": 6.450639693453589e-06, + "loss": 0.0113, + "step": 5651, + "task_loss": 0.004289904609322548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964330704296583, + "compression_loss": 0.0, + "distillation_loss": 0.03070550039410591, + "epoch": 5.37, + "learning_rate": 6.443497801237505e-06, + "loss": 0.0397, + "step": 5652, + "task_loss": 0.12050429731607437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964369279595008, + "compression_loss": 0.0, + "distillation_loss": 0.02167482301592827, + "epoch": 5.37, + "learning_rate": 6.436359279899426e-06, + "loss": 0.0288, + "step": 5653, + "task_loss": 0.09340011328458786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964407827071367, + "compression_loss": 0.0, + "distillation_loss": 0.03893493860960007, + "epoch": 5.37, + "learning_rate": 6.429224130736084e-06, + "loss": 0.0371, + "step": 5654, + "task_loss": 0.020514240488409996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964446346735695, + "compression_loss": 0.0, + "distillation_loss": 0.06006263196468353, + "epoch": 5.37, + "learning_rate": 6.4220923550436106e-06, + "loss": 0.0578, + "step": 5655, + "task_loss": 0.0373433418571949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964484838598029, + "compression_loss": 0.0, + "distillation_loss": 0.05972398817539215, + "epoch": 5.37, + "learning_rate": 6.414963954117534e-06, + "loss": 0.0573, + "step": 5656, + "task_loss": 0.035765524953603745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964523302668406, + "compression_loss": 0.0, + "distillation_loss": 0.11887294799089432, + "epoch": 5.37, + "learning_rate": 6.407838929252768e-06, + "loss": 0.114, + "step": 5657, + "task_loss": 0.07028041779994965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964561738956862, + "compression_loss": 0.0, + "distillation_loss": 0.03886423259973526, + "epoch": 5.37, + "learning_rate": 6.400717281743601e-06, + "loss": 0.0367, + "step": 5658, + "task_loss": 0.017281973734498024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964600147473435, + "compression_loss": 0.0, + "distillation_loss": 0.023825645446777344, + "epoch": 5.37, + "learning_rate": 6.393599012883708e-06, + "loss": 0.0297, + "step": 5659, + "task_loss": 0.08249877393245697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964638528228162, + "compression_loss": 0.0, + "distillation_loss": 0.029711570590734482, + "epoch": 5.38, + "learning_rate": 6.386484123966171e-06, + "loss": 0.0276, + "step": 5660, + "task_loss": 0.00878220796585083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964676881231078, + "compression_loss": 0.0, + "distillation_loss": 0.09236393123865128, + "epoch": 5.38, + "learning_rate": 6.379372616283436e-06, + "loss": 0.1032, + "step": 5661, + "task_loss": 0.20039357244968414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964715206492221, + "compression_loss": 0.0, + "distillation_loss": 0.045773375779390335, + "epoch": 5.38, + "learning_rate": 6.372264491127336e-06, + "loss": 0.0429, + "step": 5662, + "task_loss": 0.01664689928293228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964753504021628, + "compression_loss": 0.0, + "distillation_loss": 0.058652136474847794, + "epoch": 5.38, + "learning_rate": 6.365159749789112e-06, + "loss": 0.0581, + "step": 5663, + "task_loss": 0.05268477275967598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964791773829336, + "compression_loss": 0.0, + "distillation_loss": 0.07769237458705902, + "epoch": 5.38, + "learning_rate": 6.358058393559366e-06, + "loss": 0.0717, + "step": 5664, + "task_loss": 0.017609622329473495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796483001592538, + "compression_loss": 0.0, + "distillation_loss": 0.01842435821890831, + "epoch": 5.38, + "learning_rate": 6.350960423728083e-06, + "loss": 0.024, + "step": 5665, + "task_loss": 0.07376164942979813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79648682303198, + "compression_loss": 0.0, + "distillation_loss": 0.03854244574904442, + "epoch": 5.38, + "learning_rate": 6.3438658415846565e-06, + "loss": 0.0351, + "step": 5666, + "task_loss": 0.0036998502910137177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796490641702263, + "compression_loss": 0.0, + "distillation_loss": 0.018844418227672577, + "epoch": 5.38, + "learning_rate": 6.336774648417854e-06, + "loss": 0.0237, + "step": 5667, + "task_loss": 0.06747782230377197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964944576043909, + "compression_loss": 0.0, + "distillation_loss": 0.03793220967054367, + "epoch": 5.38, + "learning_rate": 6.329686845515823e-06, + "loss": 0.0457, + "step": 5668, + "task_loss": 0.11540687829256058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7964982707393672, + "compression_loss": 0.0, + "distillation_loss": 0.06492698937654495, + "epoch": 5.38, + "learning_rate": 6.322602434166083e-06, + "loss": 0.0729, + "step": 5669, + "task_loss": 0.14457391202449799 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965020811081956, + "compression_loss": 0.0, + "distillation_loss": 0.08671444654464722, + "epoch": 5.38, + "learning_rate": 6.315521415655571e-06, + "loss": 0.0834, + "step": 5670, + "task_loss": 0.05390065908432007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965058887118799, + "compression_loss": 0.0, + "distillation_loss": 0.03182661160826683, + "epoch": 5.39, + "learning_rate": 6.308443791270579e-06, + "loss": 0.0292, + "step": 5671, + "task_loss": 0.005619386211037636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965096935514236, + "compression_loss": 0.0, + "distillation_loss": 0.019694361835718155, + "epoch": 5.39, + "learning_rate": 6.301369562296786e-06, + "loss": 0.0201, + "step": 5672, + "task_loss": 0.02362089231610298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965134956278307, + "compression_loss": 0.0, + "distillation_loss": 0.09567753225564957, + "epoch": 5.39, + "learning_rate": 6.294298730019271e-06, + "loss": 0.1003, + "step": 5673, + "task_loss": 0.14172454178333282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965172949421045, + "compression_loss": 0.0, + "distillation_loss": 0.07337604463100433, + "epoch": 5.39, + "learning_rate": 6.28723129572247e-06, + "loss": 0.0814, + "step": 5674, + "task_loss": 0.1531829535961151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796521091495249, + "compression_loss": 0.0, + "distillation_loss": 0.029669515788555145, + "epoch": 5.39, + "learning_rate": 6.280167260690237e-06, + "loss": 0.0346, + "step": 5675, + "task_loss": 0.07867026329040527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965248852882676, + "compression_loss": 0.0, + "distillation_loss": 0.02858271822333336, + "epoch": 5.39, + "learning_rate": 6.273106626205768e-06, + "loss": 0.0355, + "step": 5676, + "task_loss": 0.09754221141338348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965286763221643, + "compression_loss": 0.0, + "distillation_loss": 0.058425020426511765, + "epoch": 5.39, + "learning_rate": 6.266049393551679e-06, + "loss": 0.0646, + "step": 5677, + "task_loss": 0.1198449432849884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965324645979425, + "compression_loss": 0.0, + "distillation_loss": 0.030206482857465744, + "epoch": 5.39, + "learning_rate": 6.258995564009939e-06, + "loss": 0.0289, + "step": 5678, + "task_loss": 0.017161451280117035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796536250116606, + "compression_loss": 0.0, + "distillation_loss": 0.016308438032865524, + "epoch": 5.39, + "learning_rate": 6.251945138861915e-06, + "loss": 0.0206, + "step": 5679, + "task_loss": 0.0590689480304718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965400328791584, + "compression_loss": 0.0, + "distillation_loss": 0.06689819693565369, + "epoch": 5.39, + "learning_rate": 6.244898119388337e-06, + "loss": 0.0614, + "step": 5680, + "task_loss": 0.011653106659650803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965438128866036, + "compression_loss": 0.0, + "distillation_loss": 0.047350913286209106, + "epoch": 5.4, + "learning_rate": 6.2378545068693505e-06, + "loss": 0.0555, + "step": 5681, + "task_loss": 0.1290304809808731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796547590139945, + "compression_loss": 0.0, + "distillation_loss": 0.026393115520477295, + "epoch": 5.4, + "learning_rate": 6.230814302584445e-06, + "loss": 0.0243, + "step": 5682, + "task_loss": 0.005752213299274445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965513646401865, + "compression_loss": 0.0, + "distillation_loss": 0.023942215368151665, + "epoch": 5.4, + "learning_rate": 6.223777507812514e-06, + "loss": 0.0303, + "step": 5683, + "task_loss": 0.08767253160476685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965551363883316, + "compression_loss": 0.0, + "distillation_loss": 0.17860738933086395, + "epoch": 5.4, + "learning_rate": 6.216744123831836e-06, + "loss": 0.1826, + "step": 5684, + "task_loss": 0.21895524859428406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965589053853842, + "compression_loss": 0.0, + "distillation_loss": 0.07975465059280396, + "epoch": 5.4, + "learning_rate": 6.209714151920046e-06, + "loss": 0.0936, + "step": 5685, + "task_loss": 0.21826988458633423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965626716323478, + "compression_loss": 0.0, + "distillation_loss": 0.03170043230056763, + "epoch": 5.4, + "learning_rate": 6.2026875933541785e-06, + "loss": 0.0301, + "step": 5686, + "task_loss": 0.015506980940699577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965664351302263, + "compression_loss": 0.0, + "distillation_loss": 0.05559496954083443, + "epoch": 5.4, + "learning_rate": 6.195664449410629e-06, + "loss": 0.0656, + "step": 5687, + "task_loss": 0.15556418895721436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965701958800231, + "compression_loss": 0.0, + "distillation_loss": 0.0222992654889822, + "epoch": 5.4, + "learning_rate": 6.188644721365203e-06, + "loss": 0.0213, + "step": 5688, + "task_loss": 0.012770412489771843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796573953882742, + "compression_loss": 0.0, + "distillation_loss": 0.20726126432418823, + "epoch": 5.4, + "learning_rate": 6.181628410493059e-06, + "loss": 0.1953, + "step": 5689, + "task_loss": 0.08764246851205826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965777091393867, + "compression_loss": 0.0, + "distillation_loss": 0.05251915007829666, + "epoch": 5.4, + "learning_rate": 6.174615518068738e-06, + "loss": 0.0573, + "step": 5690, + "task_loss": 0.10022127628326416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965814616509609, + "compression_loss": 0.0, + "distillation_loss": 0.021990936249494553, + "epoch": 5.4, + "learning_rate": 6.16760604536617e-06, + "loss": 0.022, + "step": 5691, + "task_loss": 0.02201257273554802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965852114184683, + "compression_loss": 0.0, + "distillation_loss": 0.02736847475171089, + "epoch": 5.41, + "learning_rate": 6.1605999936586725e-06, + "loss": 0.0252, + "step": 5692, + "task_loss": 0.005297476425766945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965889584429124, + "compression_loss": 0.0, + "distillation_loss": 0.02012113854289055, + "epoch": 5.41, + "learning_rate": 6.153597364218916e-06, + "loss": 0.0185, + "step": 5693, + "task_loss": 0.004320105537772179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7965927027252971, + "compression_loss": 0.0, + "distillation_loss": 0.05933302640914917, + "epoch": 5.41, + "learning_rate": 6.146598158318956e-06, + "loss": 0.0612, + "step": 5694, + "task_loss": 0.07751144468784332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796596444266626, + "compression_loss": 0.0, + "distillation_loss": 0.1531362384557724, + "epoch": 5.41, + "learning_rate": 6.1396023772302465e-06, + "loss": 0.1486, + "step": 5695, + "task_loss": 0.10774153470993042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966001830679028, + "compression_loss": 0.0, + "distillation_loss": 0.032585859298706055, + "epoch": 5.41, + "learning_rate": 6.132610022223598e-06, + "loss": 0.0408, + "step": 5696, + "task_loss": 0.11497241258621216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966039191301312, + "compression_loss": 0.0, + "distillation_loss": 0.13292592763900757, + "epoch": 5.41, + "learning_rate": 6.125621094569198e-06, + "loss": 0.1305, + "step": 5697, + "task_loss": 0.10847350209951401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966076524543149, + "compression_loss": 0.0, + "distillation_loss": 0.036475587636232376, + "epoch": 5.41, + "learning_rate": 6.118635595536634e-06, + "loss": 0.0353, + "step": 5698, + "task_loss": 0.024255897849798203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966113830414574, + "compression_loss": 0.0, + "distillation_loss": 0.018289241939783096, + "epoch": 5.41, + "learning_rate": 6.111653526394839e-06, + "loss": 0.0234, + "step": 5699, + "task_loss": 0.06973688304424286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966151108925625, + "compression_loss": 0.0, + "distillation_loss": 0.11107459664344788, + "epoch": 5.41, + "learning_rate": 6.104674888412157e-06, + "loss": 0.114, + "step": 5700, + "task_loss": 0.1403195559978485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796618836008634, + "compression_loss": 0.0, + "distillation_loss": 0.03960804641246796, + "epoch": 5.41, + "learning_rate": 6.097699682856275e-06, + "loss": 0.0449, + "step": 5701, + "task_loss": 0.09255840629339218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966225583906754, + "compression_loss": 0.0, + "distillation_loss": 0.06171734258532524, + "epoch": 5.42, + "learning_rate": 6.090727910994287e-06, + "loss": 0.0701, + "step": 5702, + "task_loss": 0.1456369161605835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966262780396905, + "compression_loss": 0.0, + "distillation_loss": 0.02067572996020317, + "epoch": 5.42, + "learning_rate": 6.083759574092643e-06, + "loss": 0.019, + "step": 5703, + "task_loss": 0.00421629473567009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796629994956683, + "compression_loss": 0.0, + "distillation_loss": 0.024341976270079613, + "epoch": 5.42, + "learning_rate": 6.076794673417166e-06, + "loss": 0.0314, + "step": 5704, + "task_loss": 0.09478569775819778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966337091426565, + "compression_loss": 0.0, + "distillation_loss": 0.00914972648024559, + "epoch": 5.42, + "learning_rate": 6.069833210233078e-06, + "loss": 0.0086, + "step": 5705, + "task_loss": 0.0033750738948583603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966374205986146, + "compression_loss": 0.0, + "distillation_loss": 0.08902280032634735, + "epoch": 5.42, + "learning_rate": 6.062875185804958e-06, + "loss": 0.0931, + "step": 5706, + "task_loss": 0.13023674488067627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966411293255612, + "compression_loss": 0.0, + "distillation_loss": 0.01705138012766838, + "epoch": 5.42, + "learning_rate": 6.055920601396753e-06, + "loss": 0.025, + "step": 5707, + "task_loss": 0.09632694721221924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966448353244998, + "compression_loss": 0.0, + "distillation_loss": 0.03472287952899933, + "epoch": 5.42, + "learning_rate": 6.048969458271808e-06, + "loss": 0.0329, + "step": 5708, + "task_loss": 0.01673199236392975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966485385964343, + "compression_loss": 0.0, + "distillation_loss": 0.08574015647172928, + "epoch": 5.42, + "learning_rate": 6.0420217576928365e-06, + "loss": 0.0943, + "step": 5709, + "task_loss": 0.17116937041282654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796652239142368, + "compression_loss": 0.0, + "distillation_loss": 0.02010597288608551, + "epoch": 5.42, + "learning_rate": 6.035077500921918e-06, + "loss": 0.0189, + "step": 5710, + "task_loss": 0.008190853521227837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796655936963305, + "compression_loss": 0.0, + "distillation_loss": 0.05432071536779404, + "epoch": 5.42, + "learning_rate": 6.028136689220498e-06, + "loss": 0.0523, + "step": 5711, + "task_loss": 0.03373948112130165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966596320602487, + "compression_loss": 0.0, + "distillation_loss": 0.027371380478143692, + "epoch": 5.42, + "learning_rate": 6.021199323849424e-06, + "loss": 0.032, + "step": 5712, + "task_loss": 0.0741061344742775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796663324434203, + "compression_loss": 0.0, + "distillation_loss": 0.021952230483293533, + "epoch": 5.43, + "learning_rate": 6.014265406068897e-06, + "loss": 0.0308, + "step": 5713, + "task_loss": 0.1101214811205864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966670140861714, + "compression_loss": 0.0, + "distillation_loss": 0.04616188630461693, + "epoch": 5.43, + "learning_rate": 6.00733493713849e-06, + "loss": 0.0514, + "step": 5714, + "task_loss": 0.09814758598804474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966707010171575, + "compression_loss": 0.0, + "distillation_loss": 0.0602332204580307, + "epoch": 5.43, + "learning_rate": 6.000407918317167e-06, + "loss": 0.0716, + "step": 5715, + "task_loss": 0.17372044920921326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966743852281654, + "compression_loss": 0.0, + "distillation_loss": 0.11245335638523102, + "epoch": 5.43, + "learning_rate": 5.993484350863246e-06, + "loss": 0.1094, + "step": 5716, + "task_loss": 0.0819602832198143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966780667201983, + "compression_loss": 0.0, + "distillation_loss": 0.033800143748521805, + "epoch": 5.43, + "learning_rate": 5.986564236034426e-06, + "loss": 0.0379, + "step": 5717, + "task_loss": 0.07496166974306107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966817454942602, + "compression_loss": 0.0, + "distillation_loss": 0.03140062838792801, + "epoch": 5.43, + "learning_rate": 5.9796475750877795e-06, + "loss": 0.0294, + "step": 5718, + "task_loss": 0.011493334546685219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966854215513546, + "compression_loss": 0.0, + "distillation_loss": 0.14226366579532623, + "epoch": 5.43, + "learning_rate": 5.9727343692797615e-06, + "loss": 0.153, + "step": 5719, + "task_loss": 0.24922578036785126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966890948924853, + "compression_loss": 0.0, + "distillation_loss": 0.04390806332230568, + "epoch": 5.43, + "learning_rate": 5.965824619866184e-06, + "loss": 0.0484, + "step": 5720, + "task_loss": 0.08851758390665054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796692765518656, + "compression_loss": 0.0, + "distillation_loss": 0.05784032121300697, + "epoch": 5.43, + "learning_rate": 5.958918328102223e-06, + "loss": 0.0751, + "step": 5721, + "task_loss": 0.2306969314813614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7966964334308702, + "compression_loss": 0.0, + "distillation_loss": 0.11825370043516159, + "epoch": 5.43, + "learning_rate": 5.9520154952424606e-06, + "loss": 0.1196, + "step": 5722, + "task_loss": 0.13163554668426514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967000986301317, + "compression_loss": 0.0, + "distillation_loss": 0.03910898417234421, + "epoch": 5.43, + "learning_rate": 5.945116122540817e-06, + "loss": 0.046, + "step": 5723, + "task_loss": 0.10799519717693329 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967037611174442, + "compression_loss": 0.0, + "distillation_loss": 0.03271537274122238, + "epoch": 5.44, + "learning_rate": 5.938220211250595e-06, + "loss": 0.03, + "step": 5724, + "task_loss": 0.005857875570654869 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967074208938114, + "compression_loss": 0.0, + "distillation_loss": 0.017977934330701828, + "epoch": 5.44, + "learning_rate": 5.9313277626244725e-06, + "loss": 0.0242, + "step": 5725, + "task_loss": 0.0803464874625206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796711077960237, + "compression_loss": 0.0, + "distillation_loss": 0.0364721417427063, + "epoch": 5.44, + "learning_rate": 5.924438777914504e-06, + "loss": 0.0424, + "step": 5726, + "task_loss": 0.0960368812084198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967147323177246, + "compression_loss": 0.0, + "distillation_loss": 0.04610713943839073, + "epoch": 5.44, + "learning_rate": 5.917553258372102e-06, + "loss": 0.0538, + "step": 5727, + "task_loss": 0.12283903360366821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967183839672779, + "compression_loss": 0.0, + "distillation_loss": 0.06002458930015564, + "epoch": 5.44, + "learning_rate": 5.910671205248045e-06, + "loss": 0.0667, + "step": 5728, + "task_loss": 0.12689921259880066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967220329099006, + "compression_loss": 0.0, + "distillation_loss": 0.11154115200042725, + "epoch": 5.44, + "learning_rate": 5.903792619792506e-06, + "loss": 0.1008, + "step": 5729, + "task_loss": 0.004503896459937096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967256791465963, + "compression_loss": 0.0, + "distillation_loss": 0.03865112364292145, + "epoch": 5.44, + "learning_rate": 5.896917503255006e-06, + "loss": 0.037, + "step": 5730, + "task_loss": 0.022370828315615654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967293226783689, + "compression_loss": 0.0, + "distillation_loss": 0.02259804867208004, + "epoch": 5.44, + "learning_rate": 5.890045856884435e-06, + "loss": 0.0404, + "step": 5731, + "task_loss": 0.20083081722259521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967329635062219, + "compression_loss": 0.0, + "distillation_loss": 0.03995134308934212, + "epoch": 5.44, + "learning_rate": 5.883177681929078e-06, + "loss": 0.0678, + "step": 5732, + "task_loss": 0.31826668977737427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796736601631159, + "compression_loss": 0.0, + "distillation_loss": 0.038897112011909485, + "epoch": 5.44, + "learning_rate": 5.876312979636561e-06, + "loss": 0.0417, + "step": 5733, + "task_loss": 0.06685265898704529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967402370541838, + "compression_loss": 0.0, + "distillation_loss": 0.018638338893651962, + "epoch": 5.45, + "learning_rate": 5.869451751253885e-06, + "loss": 0.0171, + "step": 5734, + "task_loss": 0.0036374125629663467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967438697763003, + "compression_loss": 0.0, + "distillation_loss": 0.03524959087371826, + "epoch": 5.45, + "learning_rate": 5.8625939980274295e-06, + "loss": 0.0405, + "step": 5735, + "task_loss": 0.0877641811966896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967474997985118, + "compression_loss": 0.0, + "distillation_loss": 0.0975470244884491, + "epoch": 5.45, + "learning_rate": 5.855739721202952e-06, + "loss": 0.0919, + "step": 5736, + "task_loss": 0.04134809970855713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967511271218222, + "compression_loss": 0.0, + "distillation_loss": 0.047942765057086945, + "epoch": 5.45, + "learning_rate": 5.848888922025553e-06, + "loss": 0.0523, + "step": 5737, + "task_loss": 0.09145848453044891 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967547517472352, + "compression_loss": 0.0, + "distillation_loss": 0.05359815061092377, + "epoch": 5.45, + "learning_rate": 5.842041601739706e-06, + "loss": 0.0587, + "step": 5738, + "task_loss": 0.10473354160785675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967583736757543, + "compression_loss": 0.0, + "distillation_loss": 0.07147708535194397, + "epoch": 5.45, + "learning_rate": 5.835197761589275e-06, + "loss": 0.0859, + "step": 5739, + "task_loss": 0.215956911444664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967619929083833, + "compression_loss": 0.0, + "distillation_loss": 0.047319263219833374, + "epoch": 5.45, + "learning_rate": 5.828357402817469e-06, + "loss": 0.0604, + "step": 5740, + "task_loss": 0.17839795351028442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796765609446126, + "compression_loss": 0.0, + "distillation_loss": 0.12125735729932785, + "epoch": 5.45, + "learning_rate": 5.821520526666868e-06, + "loss": 0.1155, + "step": 5741, + "task_loss": 0.06390950828790665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967692232899859, + "compression_loss": 0.0, + "distillation_loss": 0.02730424515902996, + "epoch": 5.45, + "learning_rate": 5.8146871343794315e-06, + "loss": 0.0254, + "step": 5742, + "task_loss": 0.00804077833890915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967728344409667, + "compression_loss": 0.0, + "distillation_loss": 0.07533392310142517, + "epoch": 5.45, + "learning_rate": 5.80785722719647e-06, + "loss": 0.0823, + "step": 5743, + "task_loss": 0.14540576934814453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967764429000722, + "compression_loss": 0.0, + "distillation_loss": 0.1283230483531952, + "epoch": 5.45, + "learning_rate": 5.801030806358679e-06, + "loss": 0.1246, + "step": 5744, + "task_loss": 0.09111975133419037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796780048668306, + "compression_loss": 0.0, + "distillation_loss": 0.07009841501712799, + "epoch": 5.46, + "learning_rate": 5.7942078731061e-06, + "loss": 0.0717, + "step": 5745, + "task_loss": 0.08628898113965988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967836517466718, + "compression_loss": 0.0, + "distillation_loss": 0.08589686453342438, + "epoch": 5.46, + "learning_rate": 5.7873884286781615e-06, + "loss": 0.0875, + "step": 5746, + "task_loss": 0.10212301462888718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967872521361732, + "compression_loss": 0.0, + "distillation_loss": 0.032520610839128494, + "epoch": 5.46, + "learning_rate": 5.7805724743136445e-06, + "loss": 0.0341, + "step": 5747, + "task_loss": 0.04851553216576576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796790849837814, + "compression_loss": 0.0, + "distillation_loss": 0.029095636680722237, + "epoch": 5.46, + "learning_rate": 5.7737600112506925e-06, + "loss": 0.0268, + "step": 5748, + "task_loss": 0.00580623559653759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967944448525979, + "compression_loss": 0.0, + "distillation_loss": 0.13918885588645935, + "epoch": 5.46, + "learning_rate": 5.766951040726837e-06, + "loss": 0.134, + "step": 5749, + "task_loss": 0.0872417539358139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7967980371815284, + "compression_loss": 0.0, + "distillation_loss": 0.03975678235292435, + "epoch": 5.46, + "learning_rate": 5.760145563978953e-06, + "loss": 0.0369, + "step": 5750, + "task_loss": 0.011658702045679092 + }, + { + "epoch": 5.46, + "eval_accuracy": 0.8853211009174312, + "eval_loss": 0.4336629807949066, + "eval_runtime": 17.9957, + "eval_samples_per_second": 48.456, + "eval_steps_per_second": 6.057, + "step": 5750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968016268256093, + "compression_loss": 0.0, + "distillation_loss": 0.015128877013921738, + "epoch": 5.46, + "learning_rate": 5.753343582243278e-06, + "loss": 0.014, + "step": 5751, + "task_loss": 0.0033825524151325226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968052137858443, + "compression_loss": 0.0, + "distillation_loss": 0.021273450925946236, + "epoch": 5.46, + "learning_rate": 5.746545096755437e-06, + "loss": 0.024, + "step": 5752, + "task_loss": 0.04804559051990509 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968087980632371, + "compression_loss": 0.0, + "distillation_loss": 0.08617687970399857, + "epoch": 5.46, + "learning_rate": 5.739750108750408e-06, + "loss": 0.085, + "step": 5753, + "task_loss": 0.07489560544490814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968123796587915, + "compression_loss": 0.0, + "distillation_loss": 0.12841589748859406, + "epoch": 5.46, + "learning_rate": 5.732958619462533e-06, + "loss": 0.1363, + "step": 5754, + "task_loss": 0.2073012888431549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968159585735108, + "compression_loss": 0.0, + "distillation_loss": 0.03129749745130539, + "epoch": 5.47, + "learning_rate": 5.72617063012551e-06, + "loss": 0.0372, + "step": 5755, + "task_loss": 0.09041489660739899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796819534808399, + "compression_loss": 0.0, + "distillation_loss": 0.04228182137012482, + "epoch": 5.47, + "learning_rate": 5.719386141972419e-06, + "loss": 0.0393, + "step": 5756, + "task_loss": 0.012629145756363869 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968231083644597, + "compression_loss": 0.0, + "distillation_loss": 0.05197533220052719, + "epoch": 5.47, + "learning_rate": 5.712605156235695e-06, + "loss": 0.0484, + "step": 5757, + "task_loss": 0.016697099432349205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968266792426966, + "compression_loss": 0.0, + "distillation_loss": 0.026457346975803375, + "epoch": 5.47, + "learning_rate": 5.7058276741471236e-06, + "loss": 0.0426, + "step": 5758, + "task_loss": 0.187742680311203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968302474441132, + "compression_loss": 0.0, + "distillation_loss": 0.08368387818336487, + "epoch": 5.47, + "learning_rate": 5.6990536969378865e-06, + "loss": 0.0848, + "step": 5759, + "task_loss": 0.09534468501806259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968338129697136, + "compression_loss": 0.0, + "distillation_loss": 0.036162346601486206, + "epoch": 5.47, + "learning_rate": 5.692283225838493e-06, + "loss": 0.0487, + "step": 5760, + "task_loss": 0.1613989919424057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796837375820501, + "compression_loss": 0.0, + "distillation_loss": 0.05870455503463745, + "epoch": 5.47, + "learning_rate": 5.6855162620788435e-06, + "loss": 0.057, + "step": 5761, + "task_loss": 0.04194345325231552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968409359974794, + "compression_loss": 0.0, + "distillation_loss": 0.07059012353420258, + "epoch": 5.47, + "learning_rate": 5.67875280688818e-06, + "loss": 0.0739, + "step": 5762, + "task_loss": 0.1037897914648056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968444935016523, + "compression_loss": 0.0, + "distillation_loss": 0.015868360176682472, + "epoch": 5.47, + "learning_rate": 5.671992861495126e-06, + "loss": 0.0187, + "step": 5763, + "task_loss": 0.04378681629896164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968480483340236, + "compression_loss": 0.0, + "distillation_loss": 0.02727351151406765, + "epoch": 5.47, + "learning_rate": 5.665236427127654e-06, + "loss": 0.0267, + "step": 5764, + "task_loss": 0.021995794028043747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968516004955968, + "compression_loss": 0.0, + "distillation_loss": 0.04122058302164078, + "epoch": 5.47, + "learning_rate": 5.658483505013096e-06, + "loss": 0.0494, + "step": 5765, + "task_loss": 0.12348385155200958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968551499873756, + "compression_loss": 0.0, + "distillation_loss": 0.10042732208967209, + "epoch": 5.48, + "learning_rate": 5.651734096378164e-06, + "loss": 0.1009, + "step": 5766, + "task_loss": 0.10483403503894806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968586968103637, + "compression_loss": 0.0, + "distillation_loss": 0.062294743955135345, + "epoch": 5.48, + "learning_rate": 5.644988202448917e-06, + "loss": 0.0605, + "step": 5767, + "task_loss": 0.04418262839317322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968622409655649, + "compression_loss": 0.0, + "distillation_loss": 0.12338921427726746, + "epoch": 5.48, + "learning_rate": 5.638245824450777e-06, + "loss": 0.1259, + "step": 5768, + "task_loss": 0.14866846799850464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968657824539827, + "compression_loss": 0.0, + "distillation_loss": 0.041369691491127014, + "epoch": 5.48, + "learning_rate": 5.631506963608518e-06, + "loss": 0.0491, + "step": 5769, + "task_loss": 0.11868831515312195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968693212766209, + "compression_loss": 0.0, + "distillation_loss": 0.07022303342819214, + "epoch": 5.48, + "learning_rate": 5.624771621146313e-06, + "loss": 0.0894, + "step": 5770, + "task_loss": 0.2618139982223511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968728574344831, + "compression_loss": 0.0, + "distillation_loss": 0.024889115244150162, + "epoch": 5.48, + "learning_rate": 5.618039798287652e-06, + "loss": 0.0231, + "step": 5771, + "task_loss": 0.006804602220654488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796876390928573, + "compression_loss": 0.0, + "distillation_loss": 0.04423792287707329, + "epoch": 5.48, + "learning_rate": 5.6113114962554035e-06, + "loss": 0.0483, + "step": 5772, + "task_loss": 0.08468769490718842 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968799217598944, + "compression_loss": 0.0, + "distillation_loss": 0.036272380501031876, + "epoch": 5.48, + "learning_rate": 5.6045867162718e-06, + "loss": 0.0509, + "step": 5773, + "task_loss": 0.18288281559944153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968834499294508, + "compression_loss": 0.0, + "distillation_loss": 0.1168503686785698, + "epoch": 5.48, + "learning_rate": 5.597865459558435e-06, + "loss": 0.1163, + "step": 5774, + "task_loss": 0.11171647161245346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796886975438246, + "compression_loss": 0.0, + "distillation_loss": 0.06244365870952606, + "epoch": 5.48, + "learning_rate": 5.591147727336246e-06, + "loss": 0.0676, + "step": 5775, + "task_loss": 0.11397860199213028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968904982872836, + "compression_loss": 0.0, + "distillation_loss": 0.027232658118009567, + "epoch": 5.49, + "learning_rate": 5.584433520825541e-06, + "loss": 0.0281, + "step": 5776, + "task_loss": 0.03567790612578392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7968940184775675, + "compression_loss": 0.0, + "distillation_loss": 0.08206064254045486, + "epoch": 5.49, + "learning_rate": 5.577722841245995e-06, + "loss": 0.0808, + "step": 5777, + "task_loss": 0.06914728134870529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796897536010101, + "compression_loss": 0.0, + "distillation_loss": 0.06064610183238983, + "epoch": 5.49, + "learning_rate": 5.571015689816639e-06, + "loss": 0.0668, + "step": 5778, + "task_loss": 0.1217413991689682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969010508858883, + "compression_loss": 0.0, + "distillation_loss": 0.045650772750377655, + "epoch": 5.49, + "learning_rate": 5.564312067755856e-06, + "loss": 0.0601, + "step": 5779, + "task_loss": 0.19037003815174103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969045631059325, + "compression_loss": 0.0, + "distillation_loss": 0.07006368786096573, + "epoch": 5.49, + "learning_rate": 5.5576119762813795e-06, + "loss": 0.0666, + "step": 5780, + "task_loss": 0.03572140634059906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969080726712376, + "compression_loss": 0.0, + "distillation_loss": 0.02565773017704487, + "epoch": 5.49, + "learning_rate": 5.550915416610331e-06, + "loss": 0.0336, + "step": 5781, + "task_loss": 0.10518058389425278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969115795828073, + "compression_loss": 0.0, + "distillation_loss": 0.03800772875547409, + "epoch": 5.49, + "learning_rate": 5.544222389959164e-06, + "loss": 0.0355, + "step": 5782, + "task_loss": 0.012635795399546623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969150838416453, + "compression_loss": 0.0, + "distillation_loss": 0.03859638050198555, + "epoch": 5.49, + "learning_rate": 5.537532897543695e-06, + "loss": 0.0359, + "step": 5783, + "task_loss": 0.011166783049702644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969185854487552, + "compression_loss": 0.0, + "distillation_loss": 0.05814467743039131, + "epoch": 5.49, + "learning_rate": 5.530846940579112e-06, + "loss": 0.0551, + "step": 5784, + "task_loss": 0.028073014691472054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969220844051406, + "compression_loss": 0.0, + "distillation_loss": 0.02096547558903694, + "epoch": 5.49, + "learning_rate": 5.524164520279948e-06, + "loss": 0.0194, + "step": 5785, + "task_loss": 0.005785791203379631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969255807118053, + "compression_loss": 0.0, + "distillation_loss": 0.03538232296705246, + "epoch": 5.49, + "learning_rate": 5.5174856378600895e-06, + "loss": 0.0328, + "step": 5786, + "task_loss": 0.00995618849992752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796929074369753, + "compression_loss": 0.0, + "distillation_loss": 0.017818935215473175, + "epoch": 5.5, + "learning_rate": 5.510810294532792e-06, + "loss": 0.0164, + "step": 5787, + "task_loss": 0.004038920626044273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969325653799872, + "compression_loss": 0.0, + "distillation_loss": 0.0632765144109726, + "epoch": 5.5, + "learning_rate": 5.504138491510674e-06, + "loss": 0.067, + "step": 5788, + "task_loss": 0.1003769114613533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796936053743512, + "compression_loss": 0.0, + "distillation_loss": 0.03711196035146713, + "epoch": 5.5, + "learning_rate": 5.497470230005691e-06, + "loss": 0.044, + "step": 5789, + "task_loss": 0.10593129694461823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969395394613306, + "compression_loss": 0.0, + "distillation_loss": 0.07631154358386993, + "epoch": 5.5, + "learning_rate": 5.490805511229158e-06, + "loss": 0.0861, + "step": 5790, + "task_loss": 0.173743337392807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969430225344468, + "compression_loss": 0.0, + "distillation_loss": 0.03230639174580574, + "epoch": 5.5, + "learning_rate": 5.484144336391769e-06, + "loss": 0.0373, + "step": 5791, + "task_loss": 0.08267946541309357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969465029638645, + "compression_loss": 0.0, + "distillation_loss": 0.031054720282554626, + "epoch": 5.5, + "learning_rate": 5.477486706703553e-06, + "loss": 0.0289, + "step": 5792, + "task_loss": 0.009296312928199768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969499807505873, + "compression_loss": 0.0, + "distillation_loss": 0.09885191917419434, + "epoch": 5.5, + "learning_rate": 5.470832623373889e-06, + "loss": 0.1043, + "step": 5793, + "task_loss": 0.1528811752796173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969534558956187, + "compression_loss": 0.0, + "distillation_loss": 0.0922151654958725, + "epoch": 5.5, + "learning_rate": 5.464182087611538e-06, + "loss": 0.0877, + "step": 5794, + "task_loss": 0.047085706144571304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969569283999626, + "compression_loss": 0.0, + "distillation_loss": 0.037871234118938446, + "epoch": 5.5, + "learning_rate": 5.457535100624592e-06, + "loss": 0.0475, + "step": 5795, + "task_loss": 0.133943110704422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969603982646226, + "compression_loss": 0.0, + "distillation_loss": 0.1495521366596222, + "epoch": 5.5, + "learning_rate": 5.450891663620519e-06, + "loss": 0.1464, + "step": 5796, + "task_loss": 0.11768987029790878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969638654906023, + "compression_loss": 0.0, + "distillation_loss": 0.03055592253804207, + "epoch": 5.51, + "learning_rate": 5.444251777806117e-06, + "loss": 0.0286, + "step": 5797, + "task_loss": 0.011272316798567772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969673300789055, + "compression_loss": 0.0, + "distillation_loss": 0.1179354190826416, + "epoch": 5.51, + "learning_rate": 5.43761544438757e-06, + "loss": 0.1202, + "step": 5798, + "task_loss": 0.14022500813007355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969707920305359, + "compression_loss": 0.0, + "distillation_loss": 0.02442678064107895, + "epoch": 5.51, + "learning_rate": 5.4309826645703886e-06, + "loss": 0.0225, + "step": 5799, + "task_loss": 0.0055536795407533646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.796974251346497, + "compression_loss": 0.0, + "distillation_loss": 0.10044455528259277, + "epoch": 5.51, + "learning_rate": 5.424353439559446e-06, + "loss": 0.1063, + "step": 5800, + "task_loss": 0.15924617648124695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969777080277928, + "compression_loss": 0.0, + "distillation_loss": 0.028251107782125473, + "epoch": 5.51, + "learning_rate": 5.417727770558984e-06, + "loss": 0.0266, + "step": 5801, + "task_loss": 0.012232955545186996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969811620754266, + "compression_loss": 0.0, + "distillation_loss": 0.02132299914956093, + "epoch": 5.51, + "learning_rate": 5.4111056587725836e-06, + "loss": 0.0201, + "step": 5802, + "task_loss": 0.009178368374705315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969846134904024, + "compression_loss": 0.0, + "distillation_loss": 0.041062891483306885, + "epoch": 5.51, + "learning_rate": 5.404487105403172e-06, + "loss": 0.0377, + "step": 5803, + "task_loss": 0.00785096362233162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969880622737237, + "compression_loss": 0.0, + "distillation_loss": 0.027310824021697044, + "epoch": 5.51, + "learning_rate": 5.397872111653052e-06, + "loss": 0.0253, + "step": 5804, + "task_loss": 0.006779264658689499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969915084263942, + "compression_loss": 0.0, + "distillation_loss": 0.17357215285301208, + "epoch": 5.51, + "learning_rate": 5.3912606787238754e-06, + "loss": 0.1662, + "step": 5805, + "task_loss": 0.09964841604232788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969949519494176, + "compression_loss": 0.0, + "distillation_loss": 0.03900089114904404, + "epoch": 5.51, + "learning_rate": 5.384652807816631e-06, + "loss": 0.0428, + "step": 5806, + "task_loss": 0.07701753824949265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7969983928437976, + "compression_loss": 0.0, + "distillation_loss": 0.026013296097517014, + "epoch": 5.51, + "learning_rate": 5.378048500131669e-06, + "loss": 0.0246, + "step": 5807, + "task_loss": 0.011500442400574684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970018311105379, + "compression_loss": 0.0, + "distillation_loss": 0.020743299275636673, + "epoch": 5.52, + "learning_rate": 5.3714477568687025e-06, + "loss": 0.019, + "step": 5808, + "task_loss": 0.0032861437648534775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970052667506422, + "compression_loss": 0.0, + "distillation_loss": 0.11350180208683014, + "epoch": 5.52, + "learning_rate": 5.3648505792267825e-06, + "loss": 0.1343, + "step": 5809, + "task_loss": 0.3215253949165344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970086997651141, + "compression_loss": 0.0, + "distillation_loss": 0.03125522658228874, + "epoch": 5.52, + "learning_rate": 5.358256968404312e-06, + "loss": 0.0371, + "step": 5810, + "task_loss": 0.08946846425533295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970121301549574, + "compression_loss": 0.0, + "distillation_loss": 0.03678523749113083, + "epoch": 5.52, + "learning_rate": 5.351666925599067e-06, + "loss": 0.0436, + "step": 5811, + "task_loss": 0.10449925065040588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970155579211756, + "compression_loss": 0.0, + "distillation_loss": 0.013445551507174969, + "epoch": 5.52, + "learning_rate": 5.345080452008145e-06, + "loss": 0.0124, + "step": 5812, + "task_loss": 0.002551089972257614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970189830647726, + "compression_loss": 0.0, + "distillation_loss": 0.09279379993677139, + "epoch": 5.52, + "learning_rate": 5.338497548828025e-06, + "loss": 0.0901, + "step": 5813, + "task_loss": 0.06628113985061646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797022405586752, + "compression_loss": 0.0, + "distillation_loss": 0.10782104730606079, + "epoch": 5.52, + "learning_rate": 5.331918217254508e-06, + "loss": 0.1014, + "step": 5814, + "task_loss": 0.043450452387332916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970258254881173, + "compression_loss": 0.0, + "distillation_loss": 0.022084686905145645, + "epoch": 5.52, + "learning_rate": 5.325342458482779e-06, + "loss": 0.0385, + "step": 5815, + "task_loss": 0.18579933047294617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970292427698724, + "compression_loss": 0.0, + "distillation_loss": 0.12914645671844482, + "epoch": 5.52, + "learning_rate": 5.3187702737073435e-06, + "loss": 0.1435, + "step": 5816, + "task_loss": 0.2728707790374756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797032657433021, + "compression_loss": 0.0, + "distillation_loss": 0.03833978995680809, + "epoch": 5.52, + "learning_rate": 5.312201664122068e-06, + "loss": 0.0571, + "step": 5817, + "task_loss": 0.22600466012954712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970360694785666, + "compression_loss": 0.0, + "distillation_loss": 0.02015780098736286, + "epoch": 5.53, + "learning_rate": 5.305636630920186e-06, + "loss": 0.0231, + "step": 5818, + "task_loss": 0.04994331672787666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797039478907513, + "compression_loss": 0.0, + "distillation_loss": 0.12706497311592102, + "epoch": 5.53, + "learning_rate": 5.299075175294258e-06, + "loss": 0.1271, + "step": 5819, + "task_loss": 0.1269303411245346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970428857208639, + "compression_loss": 0.0, + "distillation_loss": 0.03767715394496918, + "epoch": 5.53, + "learning_rate": 5.2925172984361944e-06, + "loss": 0.0446, + "step": 5820, + "task_loss": 0.10687967389822006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797046289919623, + "compression_loss": 0.0, + "distillation_loss": 0.16567179560661316, + "epoch": 5.53, + "learning_rate": 5.2859630015372804e-06, + "loss": 0.1592, + "step": 5821, + "task_loss": 0.10095572471618652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970496915047939, + "compression_loss": 0.0, + "distillation_loss": 0.1395283043384552, + "epoch": 5.53, + "learning_rate": 5.279412285788138e-06, + "loss": 0.1345, + "step": 5822, + "task_loss": 0.08924393355846405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970530904773803, + "compression_loss": 0.0, + "distillation_loss": 0.034282200038433075, + "epoch": 5.53, + "learning_rate": 5.2728651523787285e-06, + "loss": 0.0354, + "step": 5823, + "task_loss": 0.045323435217142105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970564868383858, + "compression_loss": 0.0, + "distillation_loss": 0.045332517474889755, + "epoch": 5.53, + "learning_rate": 5.266321602498361e-06, + "loss": 0.0573, + "step": 5824, + "task_loss": 0.16505207121372223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970598805888143, + "compression_loss": 0.0, + "distillation_loss": 0.036005791276693344, + "epoch": 5.53, + "learning_rate": 5.2597816373357226e-06, + "loss": 0.0365, + "step": 5825, + "task_loss": 0.04140361398458481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970632717296693, + "compression_loss": 0.0, + "distillation_loss": 0.06637313961982727, + "epoch": 5.53, + "learning_rate": 5.25324525807882e-06, + "loss": 0.0766, + "step": 5826, + "task_loss": 0.16905876994132996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970666602619545, + "compression_loss": 0.0, + "distillation_loss": 0.06400737166404724, + "epoch": 5.53, + "learning_rate": 5.246712465915011e-06, + "loss": 0.0715, + "step": 5827, + "task_loss": 0.13885146379470825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970700461866737, + "compression_loss": 0.0, + "distillation_loss": 0.07488502562046051, + "epoch": 5.53, + "learning_rate": 5.240183262031021e-06, + "loss": 0.0757, + "step": 5828, + "task_loss": 0.08303892612457275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970734295048304, + "compression_loss": 0.0, + "distillation_loss": 0.09977598488330841, + "epoch": 5.54, + "learning_rate": 5.233657647612899e-06, + "loss": 0.1021, + "step": 5829, + "task_loss": 0.12288139760494232 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970768102174285, + "compression_loss": 0.0, + "distillation_loss": 0.030375579372048378, + "epoch": 5.54, + "learning_rate": 5.227135623846069e-06, + "loss": 0.0348, + "step": 5830, + "task_loss": 0.07510203868150711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970801883254715, + "compression_loss": 0.0, + "distillation_loss": 0.024371018633246422, + "epoch": 5.54, + "learning_rate": 5.220617191915272e-06, + "loss": 0.0227, + "step": 5831, + "task_loss": 0.008041396737098694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970835638299631, + "compression_loss": 0.0, + "distillation_loss": 0.02607511542737484, + "epoch": 5.54, + "learning_rate": 5.214102353004627e-06, + "loss": 0.0242, + "step": 5832, + "task_loss": 0.0076834335923194885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970869367319072, + "compression_loss": 0.0, + "distillation_loss": 0.13398540019989014, + "epoch": 5.54, + "learning_rate": 5.207591108297582e-06, + "loss": 0.1458, + "step": 5833, + "task_loss": 0.2525257170200348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7970903070323072, + "compression_loss": 0.0, + "distillation_loss": 0.0372280478477478, + "epoch": 5.54, + "learning_rate": 5.201083458976925e-06, + "loss": 0.0347, + "step": 5834, + "task_loss": 0.011982131749391556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797093674732167, + "compression_loss": 0.0, + "distillation_loss": 0.08168113976716995, + "epoch": 5.54, + "learning_rate": 5.194579406224817e-06, + "loss": 0.0761, + "step": 5835, + "task_loss": 0.026058735325932503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79709703983249, + "compression_loss": 0.0, + "distillation_loss": 0.06715555489063263, + "epoch": 5.54, + "learning_rate": 5.188078951222744e-06, + "loss": 0.0648, + "step": 5836, + "task_loss": 0.043633393943309784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971004023342801, + "compression_loss": 0.0, + "distillation_loss": 0.3370836079120636, + "epoch": 5.54, + "learning_rate": 5.181582095151538e-06, + "loss": 0.3367, + "step": 5837, + "task_loss": 0.3332991600036621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971037622385411, + "compression_loss": 0.0, + "distillation_loss": 0.016430668532848358, + "epoch": 5.54, + "learning_rate": 5.175088839191392e-06, + "loss": 0.019, + "step": 5838, + "task_loss": 0.04250016063451767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971071195462764, + "compression_loss": 0.0, + "distillation_loss": 0.029836127534508705, + "epoch": 5.55, + "learning_rate": 5.168599184521841e-06, + "loss": 0.0381, + "step": 5839, + "task_loss": 0.11209283769130707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971104742584899, + "compression_loss": 0.0, + "distillation_loss": 0.0611884780228138, + "epoch": 5.55, + "learning_rate": 5.162113132321758e-06, + "loss": 0.0694, + "step": 5840, + "task_loss": 0.14316709339618683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971138263761851, + "compression_loss": 0.0, + "distillation_loss": 0.06297457218170166, + "epoch": 5.55, + "learning_rate": 5.155630683769358e-06, + "loss": 0.0591, + "step": 5841, + "task_loss": 0.02457287907600403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971171759003658, + "compression_loss": 0.0, + "distillation_loss": 0.04800242558121681, + "epoch": 5.55, + "learning_rate": 5.149151840042224e-06, + "loss": 0.0468, + "step": 5842, + "task_loss": 0.035977013409137726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971205228320356, + "compression_loss": 0.0, + "distillation_loss": 0.09977774322032928, + "epoch": 5.55, + "learning_rate": 5.142676602317259e-06, + "loss": 0.0959, + "step": 5843, + "task_loss": 0.060707636177539825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971238671721983, + "compression_loss": 0.0, + "distillation_loss": 0.04978405311703682, + "epoch": 5.55, + "learning_rate": 5.1362049717707165e-06, + "loss": 0.0504, + "step": 5844, + "task_loss": 0.055560655891895294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971272089218575, + "compression_loss": 0.0, + "distillation_loss": 0.07310109585523605, + "epoch": 5.55, + "learning_rate": 5.129736949578215e-06, + "loss": 0.0786, + "step": 5845, + "task_loss": 0.1284613013267517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797130548082017, + "compression_loss": 0.0, + "distillation_loss": 0.029148953035473824, + "epoch": 5.55, + "learning_rate": 5.123272536914689e-06, + "loss": 0.0419, + "step": 5846, + "task_loss": 0.1563912034034729 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971338846536803, + "compression_loss": 0.0, + "distillation_loss": 0.06112636625766754, + "epoch": 5.55, + "learning_rate": 5.116811734954429e-06, + "loss": 0.0628, + "step": 5847, + "task_loss": 0.07760436087846756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971372186378511, + "compression_loss": 0.0, + "distillation_loss": 0.087627112865448, + "epoch": 5.55, + "learning_rate": 5.1103545448710765e-06, + "loss": 0.0942, + "step": 5848, + "task_loss": 0.15320152044296265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971405500355332, + "compression_loss": 0.0, + "distillation_loss": 0.04075030982494354, + "epoch": 5.55, + "learning_rate": 5.103900967837618e-06, + "loss": 0.0486, + "step": 5849, + "task_loss": 0.11899574100971222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971438788477302, + "compression_loss": 0.0, + "distillation_loss": 0.12920604646205902, + "epoch": 5.56, + "learning_rate": 5.097451005026369e-06, + "loss": 0.1303, + "step": 5850, + "task_loss": 0.13977056741714478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971472050754459, + "compression_loss": 0.0, + "distillation_loss": 0.03929203748703003, + "epoch": 5.56, + "learning_rate": 5.091004657608989e-06, + "loss": 0.0456, + "step": 5851, + "task_loss": 0.10265965014696121 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971505287196838, + "compression_loss": 0.0, + "distillation_loss": 0.029239453375339508, + "epoch": 5.56, + "learning_rate": 5.084561926756501e-06, + "loss": 0.0311, + "step": 5852, + "task_loss": 0.047469861805438995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971538497814477, + "compression_loss": 0.0, + "distillation_loss": 0.014326438307762146, + "epoch": 5.56, + "learning_rate": 5.078122813639255e-06, + "loss": 0.0133, + "step": 5853, + "task_loss": 0.004335761070251465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971571682617412, + "compression_loss": 0.0, + "distillation_loss": 0.06414780765771866, + "epoch": 5.56, + "learning_rate": 5.071687319426946e-06, + "loss": 0.0762, + "step": 5854, + "task_loss": 0.1843860000371933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971604841615682, + "compression_loss": 0.0, + "distillation_loss": 0.033587224781513214, + "epoch": 5.56, + "learning_rate": 5.065255445288594e-06, + "loss": 0.0479, + "step": 5855, + "task_loss": 0.1766654998064041 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971637974819321, + "compression_loss": 0.0, + "distillation_loss": 0.09691178798675537, + "epoch": 5.56, + "learning_rate": 5.058827192392613e-06, + "loss": 0.0928, + "step": 5856, + "task_loss": 0.05549832805991173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971671082238367, + "compression_loss": 0.0, + "distillation_loss": 0.057575710117816925, + "epoch": 5.56, + "learning_rate": 5.05240256190671e-06, + "loss": 0.0648, + "step": 5857, + "task_loss": 0.12961743772029877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971704163882857, + "compression_loss": 0.0, + "distillation_loss": 0.08201977610588074, + "epoch": 5.56, + "learning_rate": 5.045981554997945e-06, + "loss": 0.0782, + "step": 5858, + "task_loss": 0.043463174253702164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971737219762828, + "compression_loss": 0.0, + "distillation_loss": 0.015410145744681358, + "epoch": 5.56, + "learning_rate": 5.039564172832733e-06, + "loss": 0.0147, + "step": 5859, + "task_loss": 0.008038915693759918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971770249888316, + "compression_loss": 0.0, + "distillation_loss": 0.022354498505592346, + "epoch": 5.57, + "learning_rate": 5.0331504165768236e-06, + "loss": 0.0289, + "step": 5860, + "task_loss": 0.08798445761203766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971803254269358, + "compression_loss": 0.0, + "distillation_loss": 0.03102359175682068, + "epoch": 5.57, + "learning_rate": 5.026740287395298e-06, + "loss": 0.0288, + "step": 5861, + "task_loss": 0.00922648049890995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971836232915992, + "compression_loss": 0.0, + "distillation_loss": 0.04426976293325424, + "epoch": 5.57, + "learning_rate": 5.020333786452589e-06, + "loss": 0.0411, + "step": 5862, + "task_loss": 0.012406604364514351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971869185838254, + "compression_loss": 0.0, + "distillation_loss": 0.026221610605716705, + "epoch": 5.57, + "learning_rate": 5.013930914912476e-06, + "loss": 0.0242, + "step": 5863, + "task_loss": 0.00634673610329628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971902113046181, + "compression_loss": 0.0, + "distillation_loss": 0.09950605034828186, + "epoch": 5.57, + "learning_rate": 5.007531673938059e-06, + "loss": 0.1281, + "step": 5864, + "task_loss": 0.38576656579971313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971935014549808, + "compression_loss": 0.0, + "distillation_loss": 0.03954896330833435, + "epoch": 5.57, + "learning_rate": 5.0011360646917996e-06, + "loss": 0.043, + "step": 5865, + "task_loss": 0.07363829016685486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7971967890359175, + "compression_loss": 0.0, + "distillation_loss": 0.25650808215141296, + "epoch": 5.57, + "learning_rate": 4.994744088335496e-06, + "loss": 0.2497, + "step": 5866, + "task_loss": 0.1884750872850418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972000740484316, + "compression_loss": 0.0, + "distillation_loss": 0.022899843752384186, + "epoch": 5.57, + "learning_rate": 4.9883557460302735e-06, + "loss": 0.0278, + "step": 5867, + "task_loss": 0.07162778824567795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797203356493527, + "compression_loss": 0.0, + "distillation_loss": 0.0954747349023819, + "epoch": 5.57, + "learning_rate": 4.9819710389366085e-06, + "loss": 0.091, + "step": 5868, + "task_loss": 0.05076561123132706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972066363722073, + "compression_loss": 0.0, + "distillation_loss": 0.026118488982319832, + "epoch": 5.57, + "learning_rate": 4.975589968214303e-06, + "loss": 0.0272, + "step": 5869, + "task_loss": 0.0369403176009655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972099136854761, + "compression_loss": 0.0, + "distillation_loss": 0.027349824085831642, + "epoch": 5.57, + "learning_rate": 4.969212535022527e-06, + "loss": 0.0296, + "step": 5870, + "task_loss": 0.05007128417491913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972131884343372, + "compression_loss": 0.0, + "distillation_loss": 0.03833641856908798, + "epoch": 5.58, + "learning_rate": 4.962838740519763e-06, + "loss": 0.0398, + "step": 5871, + "task_loss": 0.052747875452041626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972164606197942, + "compression_loss": 0.0, + "distillation_loss": 0.019336001947522163, + "epoch": 5.58, + "learning_rate": 4.956468585863835e-06, + "loss": 0.0207, + "step": 5872, + "task_loss": 0.03255251795053482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972197302428509, + "compression_loss": 0.0, + "distillation_loss": 0.02944711409509182, + "epoch": 5.58, + "learning_rate": 4.950102072211921e-06, + "loss": 0.036, + "step": 5873, + "task_loss": 0.09525460749864578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972229973045108, + "compression_loss": 0.0, + "distillation_loss": 0.01285221055150032, + "epoch": 5.58, + "learning_rate": 4.943739200720532e-06, + "loss": 0.0123, + "step": 5874, + "task_loss": 0.006926748901605606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972262618057777, + "compression_loss": 0.0, + "distillation_loss": 0.12414573132991791, + "epoch": 5.58, + "learning_rate": 4.937379972545508e-06, + "loss": 0.1184, + "step": 5875, + "task_loss": 0.06664574146270752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972295237476553, + "compression_loss": 0.0, + "distillation_loss": 0.08365512639284134, + "epoch": 5.58, + "learning_rate": 4.9310243888420285e-06, + "loss": 0.0858, + "step": 5876, + "task_loss": 0.1054966151714325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972327831311472, + "compression_loss": 0.0, + "distillation_loss": 0.03933534771203995, + "epoch": 5.58, + "learning_rate": 4.9246724507646305e-06, + "loss": 0.0408, + "step": 5877, + "task_loss": 0.0535866804420948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972360399572572, + "compression_loss": 0.0, + "distillation_loss": 0.015180066227912903, + "epoch": 5.58, + "learning_rate": 4.918324159467163e-06, + "loss": 0.0153, + "step": 5878, + "task_loss": 0.016459671780467033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972392942269888, + "compression_loss": 0.0, + "distillation_loss": 0.05211641639471054, + "epoch": 5.58, + "learning_rate": 4.911979516102822e-06, + "loss": 0.0481, + "step": 5879, + "task_loss": 0.012342775240540504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972425459413459, + "compression_loss": 0.0, + "distillation_loss": 0.027883626520633698, + "epoch": 5.58, + "learning_rate": 4.905638521824155e-06, + "loss": 0.0401, + "step": 5880, + "task_loss": 0.1499495804309845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797245795101332, + "compression_loss": 0.0, + "distillation_loss": 0.10857439041137695, + "epoch": 5.58, + "learning_rate": 4.899301177783017e-06, + "loss": 0.1129, + "step": 5881, + "task_loss": 0.15197938680648804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972490417079509, + "compression_loss": 0.0, + "distillation_loss": 0.027678653597831726, + "epoch": 5.59, + "learning_rate": 4.892967485130631e-06, + "loss": 0.0368, + "step": 5882, + "task_loss": 0.11922503262758255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972522857622062, + "compression_loss": 0.0, + "distillation_loss": 0.02238599956035614, + "epoch": 5.59, + "learning_rate": 4.886637445017534e-06, + "loss": 0.0206, + "step": 5883, + "task_loss": 0.0042944010347127914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972555272651016, + "compression_loss": 0.0, + "distillation_loss": 0.04405056685209274, + "epoch": 5.59, + "learning_rate": 4.880311058593617e-06, + "loss": 0.0569, + "step": 5884, + "task_loss": 0.17205776274204254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972587662176409, + "compression_loss": 0.0, + "distillation_loss": 0.038423389196395874, + "epoch": 5.59, + "learning_rate": 4.873988327008094e-06, + "loss": 0.0371, + "step": 5885, + "task_loss": 0.02501399628818035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972620026208276, + "compression_loss": 0.0, + "distillation_loss": 0.041545622050762177, + "epoch": 5.59, + "learning_rate": 4.867669251409512e-06, + "loss": 0.0476, + "step": 5886, + "task_loss": 0.10194090753793716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972652364756655, + "compression_loss": 0.0, + "distillation_loss": 0.018506459891796112, + "epoch": 5.59, + "learning_rate": 4.861353832945778e-06, + "loss": 0.017, + "step": 5887, + "task_loss": 0.003717266023159027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972684677831583, + "compression_loss": 0.0, + "distillation_loss": 0.020966939628124237, + "epoch": 5.59, + "learning_rate": 4.855042072764107e-06, + "loss": 0.0301, + "step": 5888, + "task_loss": 0.11239578574895859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972716965443095, + "compression_loss": 0.0, + "distillation_loss": 0.024587448686361313, + "epoch": 5.59, + "learning_rate": 4.848733972011058e-06, + "loss": 0.0317, + "step": 5889, + "task_loss": 0.09537127614021301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797274922760123, + "compression_loss": 0.0, + "distillation_loss": 0.03539607301354408, + "epoch": 5.59, + "learning_rate": 4.842429531832529e-06, + "loss": 0.0323, + "step": 5890, + "task_loss": 0.004601247608661652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972781464316023, + "compression_loss": 0.0, + "distillation_loss": 0.054549772292375565, + "epoch": 5.59, + "learning_rate": 4.8361287533737674e-06, + "loss": 0.0532, + "step": 5891, + "task_loss": 0.04105132073163986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972813675597513, + "compression_loss": 0.0, + "distillation_loss": 0.02551000751554966, + "epoch": 5.6, + "learning_rate": 4.829831637779322e-06, + "loss": 0.0288, + "step": 5892, + "task_loss": 0.058242082595825195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972845861455735, + "compression_loss": 0.0, + "distillation_loss": 0.025515582412481308, + "epoch": 5.6, + "learning_rate": 4.823538186193097e-06, + "loss": 0.0285, + "step": 5893, + "task_loss": 0.0552404448390007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972878021900727, + "compression_loss": 0.0, + "distillation_loss": 0.027075637131929398, + "epoch": 5.6, + "learning_rate": 4.817248399758337e-06, + "loss": 0.025, + "step": 5894, + "task_loss": 0.006751839071512222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972910156942524, + "compression_loss": 0.0, + "distillation_loss": 0.1096268743276596, + "epoch": 5.6, + "learning_rate": 4.810962279617609e-06, + "loss": 0.1301, + "step": 5895, + "task_loss": 0.31454721093177795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972942266591165, + "compression_loss": 0.0, + "distillation_loss": 0.04903785511851311, + "epoch": 5.6, + "learning_rate": 4.804679826912803e-06, + "loss": 0.0731, + "step": 5896, + "task_loss": 0.2901209592819214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7972974350856685, + "compression_loss": 0.0, + "distillation_loss": 0.0792558565735817, + "epoch": 5.6, + "learning_rate": 4.798401042785177e-06, + "loss": 0.0772, + "step": 5897, + "task_loss": 0.05906623229384422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973006409749123, + "compression_loss": 0.0, + "distillation_loss": 0.05788847804069519, + "epoch": 5.6, + "learning_rate": 4.792125928375296e-06, + "loss": 0.0816, + "step": 5898, + "task_loss": 0.2952689230442047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973038443278514, + "compression_loss": 0.0, + "distillation_loss": 0.02830476313829422, + "epoch": 5.6, + "learning_rate": 4.785854484823052e-06, + "loss": 0.026, + "step": 5899, + "task_loss": 0.00540274940431118 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973070451454894, + "compression_loss": 0.0, + "distillation_loss": 0.018592754378914833, + "epoch": 5.6, + "learning_rate": 4.779586713267695e-06, + "loss": 0.0171, + "step": 5900, + "task_loss": 0.0032580215483903885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973102434288303, + "compression_loss": 0.0, + "distillation_loss": 0.05436631664633751, + "epoch": 5.6, + "learning_rate": 4.7733226148478e-06, + "loss": 0.0595, + "step": 5901, + "task_loss": 0.10618877410888672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973134391788775, + "compression_loss": 0.0, + "distillation_loss": 0.01007351279258728, + "epoch": 5.6, + "learning_rate": 4.767062190701266e-06, + "loss": 0.0095, + "step": 5902, + "task_loss": 0.004046119749546051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973166323966349, + "compression_loss": 0.0, + "distillation_loss": 0.016038384288549423, + "epoch": 5.61, + "learning_rate": 4.760805441965321e-06, + "loss": 0.0263, + "step": 5903, + "task_loss": 0.11890155076980591 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973198230831059, + "compression_loss": 0.0, + "distillation_loss": 0.07718884944915771, + "epoch": 5.61, + "learning_rate": 4.754552369776547e-06, + "loss": 0.0741, + "step": 5904, + "task_loss": 0.046534955501556396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973230112392944, + "compression_loss": 0.0, + "distillation_loss": 0.05940188467502594, + "epoch": 5.61, + "learning_rate": 4.748302975270838e-06, + "loss": 0.062, + "step": 5905, + "task_loss": 0.08516646176576614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973261968662041, + "compression_loss": 0.0, + "distillation_loss": 0.0312301404774189, + "epoch": 5.61, + "learning_rate": 4.7420572595834185e-06, + "loss": 0.0287, + "step": 5906, + "task_loss": 0.006120791658759117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973293799648385, + "compression_loss": 0.0, + "distillation_loss": 0.013178318738937378, + "epoch": 5.61, + "learning_rate": 4.735815223848864e-06, + "loss": 0.0122, + "step": 5907, + "task_loss": 0.003270508721470833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973325605362014, + "compression_loss": 0.0, + "distillation_loss": 0.05070923641324043, + "epoch": 5.61, + "learning_rate": 4.7295768692010715e-06, + "loss": 0.0471, + "step": 5908, + "task_loss": 0.014933045953512192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973357385812966, + "compression_loss": 0.0, + "distillation_loss": 0.030595645308494568, + "epoch": 5.61, + "learning_rate": 4.723342196773267e-06, + "loss": 0.0302, + "step": 5909, + "task_loss": 0.026463741436600685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973389141011274, + "compression_loss": 0.0, + "distillation_loss": 0.03401756286621094, + "epoch": 5.61, + "learning_rate": 4.7171112076979965e-06, + "loss": 0.0317, + "step": 5910, + "task_loss": 0.010913487523794174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797342087096698, + "compression_loss": 0.0, + "distillation_loss": 0.05812390148639679, + "epoch": 5.61, + "learning_rate": 4.710883903107166e-06, + "loss": 0.0634, + "step": 5911, + "task_loss": 0.11132801324129105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973452575690118, + "compression_loss": 0.0, + "distillation_loss": 0.12190339714288712, + "epoch": 5.61, + "learning_rate": 4.70466028413199e-06, + "loss": 0.1407, + "step": 5912, + "task_loss": 0.3099902868270874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973484255190724, + "compression_loss": 0.0, + "distillation_loss": 0.01616412214934826, + "epoch": 5.62, + "learning_rate": 4.6984403519030076e-06, + "loss": 0.0148, + "step": 5913, + "task_loss": 0.002981981262564659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973515909478835, + "compression_loss": 0.0, + "distillation_loss": 0.0333518460392952, + "epoch": 5.62, + "learning_rate": 4.692224107550117e-06, + "loss": 0.0307, + "step": 5914, + "task_loss": 0.007058465853333473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973547538564489, + "compression_loss": 0.0, + "distillation_loss": 0.008787909522652626, + "epoch": 5.62, + "learning_rate": 4.686011552202518e-06, + "loss": 0.0082, + "step": 5915, + "task_loss": 0.003183361142873764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973579142457724, + "compression_loss": 0.0, + "distillation_loss": 0.04432186111807823, + "epoch": 5.62, + "learning_rate": 4.679802686988749e-06, + "loss": 0.0479, + "step": 5916, + "task_loss": 0.0803055539727211 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973610721168575, + "compression_loss": 0.0, + "distillation_loss": 0.018226707354187965, + "epoch": 5.62, + "learning_rate": 4.673597513036684e-06, + "loss": 0.0243, + "step": 5917, + "task_loss": 0.0785667896270752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973642274707078, + "compression_loss": 0.0, + "distillation_loss": 0.033110909163951874, + "epoch": 5.62, + "learning_rate": 4.667396031473534e-06, + "loss": 0.0325, + "step": 5918, + "task_loss": 0.02661288157105446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973673803083271, + "compression_loss": 0.0, + "distillation_loss": 0.06958150863647461, + "epoch": 5.62, + "learning_rate": 4.661198243425813e-06, + "loss": 0.0723, + "step": 5919, + "task_loss": 0.0964449942111969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973705306307192, + "compression_loss": 0.0, + "distillation_loss": 0.04093363136053085, + "epoch": 5.62, + "learning_rate": 4.655004150019379e-06, + "loss": 0.0406, + "step": 5920, + "task_loss": 0.03757309168577194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973736784388875, + "compression_loss": 0.0, + "distillation_loss": 0.0374174565076828, + "epoch": 5.62, + "learning_rate": 4.648813752379433e-06, + "loss": 0.0439, + "step": 5921, + "task_loss": 0.10244208574295044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973768237338359, + "compression_loss": 0.0, + "distillation_loss": 0.017319556325674057, + "epoch": 5.62, + "learning_rate": 4.642627051630477e-06, + "loss": 0.0167, + "step": 5922, + "task_loss": 0.011339414864778519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973799665165681, + "compression_loss": 0.0, + "distillation_loss": 0.028723616153001785, + "epoch": 5.62, + "learning_rate": 4.636444048896355e-06, + "loss": 0.0353, + "step": 5923, + "task_loss": 0.09466679394245148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973831067880875, + "compression_loss": 0.0, + "distillation_loss": 0.08423987776041031, + "epoch": 5.63, + "learning_rate": 4.63026474530025e-06, + "loss": 0.0834, + "step": 5924, + "task_loss": 0.07615604996681213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973862445493982, + "compression_loss": 0.0, + "distillation_loss": 0.026835883036255836, + "epoch": 5.63, + "learning_rate": 4.624089141964649e-06, + "loss": 0.0268, + "step": 5925, + "task_loss": 0.025994790717959404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973893798015036, + "compression_loss": 0.0, + "distillation_loss": 0.05702338367700577, + "epoch": 5.63, + "learning_rate": 4.617917240011394e-06, + "loss": 0.0545, + "step": 5926, + "task_loss": 0.032026879489421844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973925125454074, + "compression_loss": 0.0, + "distillation_loss": 0.026394031941890717, + "epoch": 5.63, + "learning_rate": 4.611749040561625e-06, + "loss": 0.0242, + "step": 5927, + "task_loss": 0.004607599228620529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973956427821134, + "compression_loss": 0.0, + "distillation_loss": 0.1339588314294815, + "epoch": 5.63, + "learning_rate": 4.6055845447358415e-06, + "loss": 0.1297, + "step": 5928, + "task_loss": 0.09128076583147049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7973987705126252, + "compression_loss": 0.0, + "distillation_loss": 0.06346623599529266, + "epoch": 5.63, + "learning_rate": 4.599423753653845e-06, + "loss": 0.0697, + "step": 5929, + "task_loss": 0.12597879767417908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974018957379464, + "compression_loss": 0.0, + "distillation_loss": 0.06505750119686127, + "epoch": 5.63, + "learning_rate": 4.593266668434767e-06, + "loss": 0.0716, + "step": 5930, + "task_loss": 0.13001671433448792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974050184590808, + "compression_loss": 0.0, + "distillation_loss": 0.05653805285692215, + "epoch": 5.63, + "learning_rate": 4.587113290197087e-06, + "loss": 0.063, + "step": 5931, + "task_loss": 0.12097219377756119 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974081386770322, + "compression_loss": 0.0, + "distillation_loss": 0.07613807171583176, + "epoch": 5.63, + "learning_rate": 4.580963620058587e-06, + "loss": 0.0749, + "step": 5932, + "task_loss": 0.06361458450555801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974112563928041, + "compression_loss": 0.0, + "distillation_loss": 0.16531898081302643, + "epoch": 5.63, + "learning_rate": 4.5748176591363795e-06, + "loss": 0.1618, + "step": 5933, + "task_loss": 0.129730224609375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974143716074001, + "compression_loss": 0.0, + "distillation_loss": 0.03830117732286453, + "epoch": 5.64, + "learning_rate": 4.56867540854691e-06, + "loss": 0.0433, + "step": 5934, + "task_loss": 0.088396355509758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974174843218241, + "compression_loss": 0.0, + "distillation_loss": 0.0483672171831131, + "epoch": 5.64, + "learning_rate": 4.562536869405959e-06, + "loss": 0.0454, + "step": 5935, + "task_loss": 0.01907249167561531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974205945370797, + "compression_loss": 0.0, + "distillation_loss": 0.029438257217407227, + "epoch": 5.64, + "learning_rate": 4.556402042828611e-06, + "loss": 0.0342, + "step": 5936, + "task_loss": 0.07672730088233948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974237022541706, + "compression_loss": 0.0, + "distillation_loss": 0.024926815181970596, + "epoch": 5.64, + "learning_rate": 4.550270929929288e-06, + "loss": 0.0329, + "step": 5937, + "task_loss": 0.10498930513858795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974268074741003, + "compression_loss": 0.0, + "distillation_loss": 0.10737212002277374, + "epoch": 5.64, + "learning_rate": 4.54414353182174e-06, + "loss": 0.103, + "step": 5938, + "task_loss": 0.06395427882671356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974299101978728, + "compression_loss": 0.0, + "distillation_loss": 0.10752628743648529, + "epoch": 5.64, + "learning_rate": 4.538019849619035e-06, + "loss": 0.0984, + "step": 5939, + "task_loss": 0.01625998131930828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974330104264915, + "compression_loss": 0.0, + "distillation_loss": 0.033906545490026474, + "epoch": 5.64, + "learning_rate": 4.531899884433574e-06, + "loss": 0.034, + "step": 5940, + "task_loss": 0.035197898745536804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974361081609602, + "compression_loss": 0.0, + "distillation_loss": 0.10264087468385696, + "epoch": 5.64, + "learning_rate": 4.525783637377065e-06, + "loss": 0.0959, + "step": 5941, + "task_loss": 0.03510020300745964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974392034022827, + "compression_loss": 0.0, + "distillation_loss": 0.021447885781526566, + "epoch": 5.64, + "learning_rate": 4.519671109560567e-06, + "loss": 0.0358, + "step": 5942, + "task_loss": 0.16513197124004364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974422961514624, + "compression_loss": 0.0, + "distillation_loss": 0.12002286314964294, + "epoch": 5.64, + "learning_rate": 4.5135623020944485e-06, + "loss": 0.1311, + "step": 5943, + "task_loss": 0.23128020763397217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974453864095034, + "compression_loss": 0.0, + "distillation_loss": 0.04369574412703514, + "epoch": 5.64, + "learning_rate": 4.507457216088396e-06, + "loss": 0.0538, + "step": 5944, + "task_loss": 0.14453351497650146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974484741774089, + "compression_loss": 0.0, + "distillation_loss": 0.08547208458185196, + "epoch": 5.65, + "learning_rate": 4.501355852651443e-06, + "loss": 0.0862, + "step": 5945, + "task_loss": 0.09284761548042297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974515594561828, + "compression_loss": 0.0, + "distillation_loss": 0.053911492228507996, + "epoch": 5.65, + "learning_rate": 4.495258212891918e-06, + "loss": 0.058, + "step": 5946, + "task_loss": 0.09494040161371231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974546422468289, + "compression_loss": 0.0, + "distillation_loss": 0.0476272851228714, + "epoch": 5.65, + "learning_rate": 4.489164297917492e-06, + "loss": 0.0436, + "step": 5947, + "task_loss": 0.00767885148525238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974577225503507, + "compression_loss": 0.0, + "distillation_loss": 0.035985067486763, + "epoch": 5.65, + "learning_rate": 4.483074108835145e-06, + "loss": 0.057, + "step": 5948, + "task_loss": 0.24564029276371002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797460800367752, + "compression_loss": 0.0, + "distillation_loss": 0.09636256098747253, + "epoch": 5.65, + "learning_rate": 4.476987646751205e-06, + "loss": 0.1025, + "step": 5949, + "task_loss": 0.15778429806232452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974638757000365, + "compression_loss": 0.0, + "distillation_loss": 0.07263234257698059, + "epoch": 5.65, + "learning_rate": 4.470904912771298e-06, + "loss": 0.0894, + "step": 5950, + "task_loss": 0.24070273339748383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974669485482078, + "compression_loss": 0.0, + "distillation_loss": 0.024517526850104332, + "epoch": 5.65, + "learning_rate": 4.46482590800037e-06, + "loss": 0.0281, + "step": 5951, + "task_loss": 0.05985688790678978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974700189132695, + "compression_loss": 0.0, + "distillation_loss": 0.06554505228996277, + "epoch": 5.65, + "learning_rate": 4.458750633542727e-06, + "loss": 0.0605, + "step": 5952, + "task_loss": 0.015469128265976906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974730867962254, + "compression_loss": 0.0, + "distillation_loss": 0.022991040721535683, + "epoch": 5.65, + "learning_rate": 4.45267909050196e-06, + "loss": 0.0213, + "step": 5953, + "task_loss": 0.005838258191943169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974761521980792, + "compression_loss": 0.0, + "distillation_loss": 0.08440842479467392, + "epoch": 5.65, + "learning_rate": 4.446611279980992e-06, + "loss": 0.0993, + "step": 5954, + "task_loss": 0.23307430744171143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974792151198347, + "compression_loss": 0.0, + "distillation_loss": 0.04542703554034233, + "epoch": 5.66, + "learning_rate": 4.440547203082065e-06, + "loss": 0.0493, + "step": 5955, + "task_loss": 0.08394621312618256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974822755624952, + "compression_loss": 0.0, + "distillation_loss": 0.02426799014210701, + "epoch": 5.66, + "learning_rate": 4.434486860906761e-06, + "loss": 0.0256, + "step": 5956, + "task_loss": 0.03762784227728844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974853335270646, + "compression_loss": 0.0, + "distillation_loss": 0.02789018675684929, + "epoch": 5.66, + "learning_rate": 4.4284302545559624e-06, + "loss": 0.0261, + "step": 5957, + "task_loss": 0.010452285408973694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974883890145468, + "compression_loss": 0.0, + "distillation_loss": 0.08330793678760529, + "epoch": 5.66, + "learning_rate": 4.422377385129878e-06, + "loss": 0.0919, + "step": 5958, + "task_loss": 0.1694803088903427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974914420259451, + "compression_loss": 0.0, + "distillation_loss": 0.055434923619031906, + "epoch": 5.66, + "learning_rate": 4.416328253728041e-06, + "loss": 0.0574, + "step": 5959, + "task_loss": 0.07511811703443527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974944925622635, + "compression_loss": 0.0, + "distillation_loss": 0.022852642461657524, + "epoch": 5.66, + "learning_rate": 4.410282861449317e-06, + "loss": 0.0288, + "step": 5960, + "task_loss": 0.08201512694358826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7974975406245054, + "compression_loss": 0.0, + "distillation_loss": 0.023079611361026764, + "epoch": 5.66, + "learning_rate": 4.404241209391874e-06, + "loss": 0.0443, + "step": 5961, + "task_loss": 0.23562346398830414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975005862136747, + "compression_loss": 0.0, + "distillation_loss": 0.03505755960941315, + "epoch": 5.66, + "learning_rate": 4.398203298653195e-06, + "loss": 0.0437, + "step": 5962, + "task_loss": 0.12132131308317184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797503629330775, + "compression_loss": 0.0, + "distillation_loss": 0.036127813160419464, + "epoch": 5.66, + "learning_rate": 4.392169130330115e-06, + "loss": 0.0389, + "step": 5963, + "task_loss": 0.06360867619514465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79750666997681, + "compression_loss": 0.0, + "distillation_loss": 0.03919731080532074, + "epoch": 5.66, + "learning_rate": 4.386138705518761e-06, + "loss": 0.0428, + "step": 5964, + "task_loss": 0.07522215694189072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975097081527833, + "compression_loss": 0.0, + "distillation_loss": 0.024520831182599068, + "epoch": 5.66, + "learning_rate": 4.380112025314581e-06, + "loss": 0.0316, + "step": 5965, + "task_loss": 0.09532146900892258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975127438596987, + "compression_loss": 0.0, + "distillation_loss": 0.04087524488568306, + "epoch": 5.67, + "learning_rate": 4.374089090812367e-06, + "loss": 0.0448, + "step": 5966, + "task_loss": 0.0800790786743164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975157770985598, + "compression_loss": 0.0, + "distillation_loss": 0.018016282469034195, + "epoch": 5.67, + "learning_rate": 4.368069903106203e-06, + "loss": 0.0224, + "step": 5967, + "task_loss": 0.06212467700242996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975188078703704, + "compression_loss": 0.0, + "distillation_loss": 0.02099071443080902, + "epoch": 5.67, + "learning_rate": 4.3620544632894996e-06, + "loss": 0.0255, + "step": 5968, + "task_loss": 0.0659455806016922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797521836176134, + "compression_loss": 0.0, + "distillation_loss": 0.03363244980573654, + "epoch": 5.67, + "learning_rate": 4.3560427724549965e-06, + "loss": 0.0392, + "step": 5969, + "task_loss": 0.08905819803476334 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975248620168545, + "compression_loss": 0.0, + "distillation_loss": 0.05440010130405426, + "epoch": 5.67, + "learning_rate": 4.350034831694752e-06, + "loss": 0.0709, + "step": 5970, + "task_loss": 0.21891024708747864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975278853935354, + "compression_loss": 0.0, + "distillation_loss": 0.0742577612400055, + "epoch": 5.67, + "learning_rate": 4.344030642100133e-06, + "loss": 0.0727, + "step": 5971, + "task_loss": 0.05856480449438095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975309063071804, + "compression_loss": 0.0, + "distillation_loss": 0.17016083002090454, + "epoch": 5.67, + "learning_rate": 4.33803020476182e-06, + "loss": 0.1682, + "step": 5972, + "task_loss": 0.15005451440811157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975339247587933, + "compression_loss": 0.0, + "distillation_loss": 0.03252163529396057, + "epoch": 5.67, + "learning_rate": 4.3320335207698376e-06, + "loss": 0.03, + "step": 5973, + "task_loss": 0.007399743422865868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975369407493776, + "compression_loss": 0.0, + "distillation_loss": 0.022539321333169937, + "epoch": 5.67, + "learning_rate": 4.326040591213501e-06, + "loss": 0.0209, + "step": 5974, + "task_loss": 0.006453389301896095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975399542799372, + "compression_loss": 0.0, + "distillation_loss": 0.15156593918800354, + "epoch": 5.67, + "learning_rate": 4.320051417181453e-06, + "loss": 0.1463, + "step": 5975, + "task_loss": 0.09853891283273697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975429653514756, + "compression_loss": 0.0, + "distillation_loss": 0.04325627535581589, + "epoch": 5.68, + "learning_rate": 4.314065999761668e-06, + "loss": 0.0558, + "step": 5976, + "task_loss": 0.1683274209499359 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975459739649966, + "compression_loss": 0.0, + "distillation_loss": 0.019812000915408134, + "epoch": 5.68, + "learning_rate": 4.308084340041413e-06, + "loss": 0.0182, + "step": 5977, + "task_loss": 0.003462914377450943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975489801215038, + "compression_loss": 0.0, + "distillation_loss": 0.12688830494880676, + "epoch": 5.68, + "learning_rate": 4.302106439107298e-06, + "loss": 0.1374, + "step": 5978, + "task_loss": 0.23153865337371826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975519838220009, + "compression_loss": 0.0, + "distillation_loss": 0.0461309477686882, + "epoch": 5.68, + "learning_rate": 4.296132298045222e-06, + "loss": 0.0423, + "step": 5979, + "task_loss": 0.007924774661660194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975549850674917, + "compression_loss": 0.0, + "distillation_loss": 0.08937639743089676, + "epoch": 5.68, + "learning_rate": 4.29016191794043e-06, + "loss": 0.0852, + "step": 5980, + "task_loss": 0.04732952266931534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975579838589797, + "compression_loss": 0.0, + "distillation_loss": 0.017793424427509308, + "epoch": 5.68, + "learning_rate": 4.284195299877469e-06, + "loss": 0.0164, + "step": 5981, + "task_loss": 0.003985332325100899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975609801974687, + "compression_loss": 0.0, + "distillation_loss": 0.04416952282190323, + "epoch": 5.68, + "learning_rate": 4.278232444940192e-06, + "loss": 0.041, + "step": 5982, + "task_loss": 0.01279473677277565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975639740839623, + "compression_loss": 0.0, + "distillation_loss": 0.10886112600564957, + "epoch": 5.68, + "learning_rate": 4.272273354211795e-06, + "loss": 0.1035, + "step": 5983, + "task_loss": 0.05517327040433884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975669655194643, + "compression_loss": 0.0, + "distillation_loss": 0.02088319882750511, + "epoch": 5.68, + "learning_rate": 4.266318028774768e-06, + "loss": 0.0191, + "step": 5984, + "task_loss": 0.00327393040060997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975699545049783, + "compression_loss": 0.0, + "distillation_loss": 0.038873784244060516, + "epoch": 5.68, + "learning_rate": 4.260366469710919e-06, + "loss": 0.0364, + "step": 5985, + "task_loss": 0.013767654076218605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975729410415079, + "compression_loss": 0.0, + "distillation_loss": 0.014923624694347382, + "epoch": 5.68, + "learning_rate": 4.254418678101385e-06, + "loss": 0.0138, + "step": 5986, + "task_loss": 0.0034323427826166153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975759251300569, + "compression_loss": 0.0, + "distillation_loss": 0.011618406511843204, + "epoch": 5.69, + "learning_rate": 4.248474655026618e-06, + "loss": 0.011, + "step": 5987, + "task_loss": 0.005057178437709808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797578906771629, + "compression_loss": 0.0, + "distillation_loss": 0.03404705598950386, + "epoch": 5.69, + "learning_rate": 4.242534401566367e-06, + "loss": 0.052, + "step": 5988, + "task_loss": 0.2138359099626541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975818859672279, + "compression_loss": 0.0, + "distillation_loss": 0.02775607816874981, + "epoch": 5.69, + "learning_rate": 4.236597918799709e-06, + "loss": 0.0253, + "step": 5989, + "task_loss": 0.0035394616425037384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975848627178571, + "compression_loss": 0.0, + "distillation_loss": 0.029923155903816223, + "epoch": 5.69, + "learning_rate": 4.23066520780504e-06, + "loss": 0.0275, + "step": 5990, + "task_loss": 0.005590047687292099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975878370245204, + "compression_loss": 0.0, + "distillation_loss": 0.03592822700738907, + "epoch": 5.69, + "learning_rate": 4.224736269660062e-06, + "loss": 0.0419, + "step": 5991, + "task_loss": 0.09543702751398087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975908088882216, + "compression_loss": 0.0, + "distillation_loss": 0.040161147713661194, + "epoch": 5.69, + "learning_rate": 4.218811105441789e-06, + "loss": 0.0431, + "step": 5992, + "task_loss": 0.06955252587795258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975937783099643, + "compression_loss": 0.0, + "distillation_loss": 0.013323452323675156, + "epoch": 5.69, + "learning_rate": 4.21288971622657e-06, + "loss": 0.0177, + "step": 5993, + "task_loss": 0.056832775473594666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797596745290752, + "compression_loss": 0.0, + "distillation_loss": 0.05237278342247009, + "epoch": 5.69, + "learning_rate": 4.206972103090037e-06, + "loss": 0.0474, + "step": 5994, + "task_loss": 0.0029510650783777237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7975997098315886, + "compression_loss": 0.0, + "distillation_loss": 0.09177492558956146, + "epoch": 5.69, + "learning_rate": 4.201058267107169e-06, + "loss": 0.0933, + "step": 5995, + "task_loss": 0.10701367259025574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976026719334777, + "compression_loss": 0.0, + "distillation_loss": 0.034797269850969315, + "epoch": 5.69, + "learning_rate": 4.195148209352232e-06, + "loss": 0.0439, + "step": 5996, + "task_loss": 0.12537652254104614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976056315974229, + "compression_loss": 0.0, + "distillation_loss": 0.010959111154079437, + "epoch": 5.7, + "learning_rate": 4.189241930898824e-06, + "loss": 0.0103, + "step": 5997, + "task_loss": 0.00464300811290741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976085888244282, + "compression_loss": 0.0, + "distillation_loss": 0.027968231588602066, + "epoch": 5.7, + "learning_rate": 4.183339432819844e-06, + "loss": 0.0294, + "step": 5998, + "task_loss": 0.04234401881694794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976115436154969, + "compression_loss": 0.0, + "distillation_loss": 0.06177544593811035, + "epoch": 5.7, + "learning_rate": 4.177440716187506e-06, + "loss": 0.058, + "step": 5999, + "task_loss": 0.024354927241802216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976144959716329, + "compression_loss": 0.0, + "distillation_loss": 0.024940505623817444, + "epoch": 5.7, + "learning_rate": 4.17154578207335e-06, + "loss": 0.0233, + "step": 6000, + "task_loss": 0.008159097284078598 + }, + { + "epoch": 5.7, + "eval_accuracy": 0.8830275229357798, + "eval_loss": 0.45090019702911377, + "eval_runtime": 18.0915, + "eval_samples_per_second": 48.199, + "eval_steps_per_second": 6.025, + "step": 6000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976174458938398, + "compression_loss": 0.0, + "distillation_loss": 0.0402870811522007, + "epoch": 5.7, + "learning_rate": 4.165654631548213e-06, + "loss": 0.0397, + "step": 6001, + "task_loss": 0.03429171442985535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976203933831213, + "compression_loss": 0.0, + "distillation_loss": 0.01571035385131836, + "epoch": 5.7, + "learning_rate": 4.159767265682243e-06, + "loss": 0.0147, + "step": 6002, + "task_loss": 0.0054051876068115234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976233384404812, + "compression_loss": 0.0, + "distillation_loss": 0.033142976462841034, + "epoch": 5.7, + "learning_rate": 4.153883685544921e-06, + "loss": 0.0392, + "step": 6003, + "task_loss": 0.09410811960697174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976262810669229, + "compression_loss": 0.0, + "distillation_loss": 0.03702503442764282, + "epoch": 5.7, + "learning_rate": 4.1480038922050295e-06, + "loss": 0.0537, + "step": 6004, + "task_loss": 0.20370852947235107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976292212634503, + "compression_loss": 0.0, + "distillation_loss": 0.03896544128656387, + "epoch": 5.7, + "learning_rate": 4.142127886730654e-06, + "loss": 0.0421, + "step": 6005, + "task_loss": 0.0707533061504364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976321590310671, + "compression_loss": 0.0, + "distillation_loss": 0.02081841602921486, + "epoch": 5.7, + "learning_rate": 4.136255670189193e-06, + "loss": 0.0193, + "step": 6006, + "task_loss": 0.005161428824067116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976350943707768, + "compression_loss": 0.0, + "distillation_loss": 0.12577922642230988, + "epoch": 5.7, + "learning_rate": 4.130387243647377e-06, + "loss": 0.1272, + "step": 6007, + "task_loss": 0.1395563781261444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976380272835834, + "compression_loss": 0.0, + "distillation_loss": 0.019099004566669464, + "epoch": 5.71, + "learning_rate": 4.12452260817123e-06, + "loss": 0.0204, + "step": 6008, + "task_loss": 0.03233078494668007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976409577704902, + "compression_loss": 0.0, + "distillation_loss": 0.04859714210033417, + "epoch": 5.71, + "learning_rate": 4.118661764826079e-06, + "loss": 0.051, + "step": 6009, + "task_loss": 0.07280280441045761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976438858325011, + "compression_loss": 0.0, + "distillation_loss": 0.018262770026922226, + "epoch": 5.71, + "learning_rate": 4.112804714676594e-06, + "loss": 0.0169, + "step": 6010, + "task_loss": 0.004157818853855133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976468114706198, + "compression_loss": 0.0, + "distillation_loss": 0.047912146896123886, + "epoch": 5.71, + "learning_rate": 4.106951458786717e-06, + "loss": 0.0471, + "step": 6011, + "task_loss": 0.04020364210009575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976497346858499, + "compression_loss": 0.0, + "distillation_loss": 0.02587919309735298, + "epoch": 5.71, + "learning_rate": 4.101101998219737e-06, + "loss": 0.0394, + "step": 6012, + "task_loss": 0.1613081842660904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976526554791951, + "compression_loss": 0.0, + "distillation_loss": 0.025433223694562912, + "epoch": 5.71, + "learning_rate": 4.095256334038222e-06, + "loss": 0.024, + "step": 6013, + "task_loss": 0.010708844289183617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976555738516591, + "compression_loss": 0.0, + "distillation_loss": 0.025807831436395645, + "epoch": 5.71, + "learning_rate": 4.089414467304078e-06, + "loss": 0.028, + "step": 6014, + "task_loss": 0.0474732369184494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976584898042456, + "compression_loss": 0.0, + "distillation_loss": 0.022514909505844116, + "epoch": 5.71, + "learning_rate": 4.083576399078504e-06, + "loss": 0.0324, + "step": 6015, + "task_loss": 0.12099795788526535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976614033379582, + "compression_loss": 0.0, + "distillation_loss": 0.013821316882967949, + "epoch": 5.71, + "learning_rate": 4.077742130422005e-06, + "loss": 0.0127, + "step": 6016, + "task_loss": 0.002670343965291977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976643144538006, + "compression_loss": 0.0, + "distillation_loss": 0.0412713922560215, + "epoch": 5.71, + "learning_rate": 4.071911662394418e-06, + "loss": 0.0377, + "step": 6017, + "task_loss": 0.005630454048514366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976672231527766, + "compression_loss": 0.0, + "distillation_loss": 0.07258160412311554, + "epoch": 5.72, + "learning_rate": 4.066084996054867e-06, + "loss": 0.0764, + "step": 6018, + "task_loss": 0.11061573028564453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976701294358898, + "compression_loss": 0.0, + "distillation_loss": 0.04702713340520859, + "epoch": 5.72, + "learning_rate": 4.060262132461795e-06, + "loss": 0.0465, + "step": 6019, + "task_loss": 0.041665218770504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976730333041439, + "compression_loss": 0.0, + "distillation_loss": 0.04431241750717163, + "epoch": 5.72, + "learning_rate": 4.054443072672942e-06, + "loss": 0.042, + "step": 6020, + "task_loss": 0.020761430263519287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976759347585425, + "compression_loss": 0.0, + "distillation_loss": 0.02348828688263893, + "epoch": 5.72, + "learning_rate": 4.048627817745393e-06, + "loss": 0.0219, + "step": 6021, + "task_loss": 0.007251596078276634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976788338000894, + "compression_loss": 0.0, + "distillation_loss": 0.015367396175861359, + "epoch": 5.72, + "learning_rate": 4.042816368735502e-06, + "loss": 0.0143, + "step": 6022, + "task_loss": 0.004556819796562195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976817304297882, + "compression_loss": 0.0, + "distillation_loss": 0.08127819746732712, + "epoch": 5.72, + "learning_rate": 4.037008726698943e-06, + "loss": 0.0863, + "step": 6023, + "task_loss": 0.13124635815620422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976846246486426, + "compression_loss": 0.0, + "distillation_loss": 0.010580083355307579, + "epoch": 5.72, + "learning_rate": 4.0312048926907156e-06, + "loss": 0.0099, + "step": 6024, + "task_loss": 0.0037852171808481216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976875164576563, + "compression_loss": 0.0, + "distillation_loss": 0.036560431122779846, + "epoch": 5.72, + "learning_rate": 4.025404867765103e-06, + "loss": 0.0468, + "step": 6025, + "task_loss": 0.13939811289310455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797690405857833, + "compression_loss": 0.0, + "distillation_loss": 0.05065472796559334, + "epoch": 5.72, + "learning_rate": 4.019608652975712e-06, + "loss": 0.0525, + "step": 6026, + "task_loss": 0.06941260397434235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976932928501764, + "compression_loss": 0.0, + "distillation_loss": 0.033366162329912186, + "epoch": 5.72, + "learning_rate": 4.013816249375446e-06, + "loss": 0.0437, + "step": 6027, + "task_loss": 0.13670209050178528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976961774356901, + "compression_loss": 0.0, + "distillation_loss": 0.07585856318473816, + "epoch": 5.72, + "learning_rate": 4.008027658016536e-06, + "loss": 0.0709, + "step": 6028, + "task_loss": 0.026351409032940865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7976990596153778, + "compression_loss": 0.0, + "distillation_loss": 0.15275973081588745, + "epoch": 5.73, + "learning_rate": 4.0022428799504936e-06, + "loss": 0.1562, + "step": 6029, + "task_loss": 0.18706992268562317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977019393902433, + "compression_loss": 0.0, + "distillation_loss": 0.014029610902071, + "epoch": 5.73, + "learning_rate": 3.996461916228159e-06, + "loss": 0.02, + "step": 6030, + "task_loss": 0.07381336390972137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79770481676129, + "compression_loss": 0.0, + "distillation_loss": 0.014353383332490921, + "epoch": 5.73, + "learning_rate": 3.990684767899677e-06, + "loss": 0.0231, + "step": 6031, + "task_loss": 0.10191542655229568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977076917295219, + "compression_loss": 0.0, + "distillation_loss": 0.12925012409687042, + "epoch": 5.73, + "learning_rate": 3.984911436014488e-06, + "loss": 0.1234, + "step": 6032, + "task_loss": 0.07045982778072357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977105642959426, + "compression_loss": 0.0, + "distillation_loss": 0.021065477281808853, + "epoch": 5.73, + "learning_rate": 3.979141921621346e-06, + "loss": 0.0263, + "step": 6033, + "task_loss": 0.07347995042800903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977134344615556, + "compression_loss": 0.0, + "distillation_loss": 0.03473827242851257, + "epoch": 5.73, + "learning_rate": 3.9733762257683076e-06, + "loss": 0.0325, + "step": 6034, + "task_loss": 0.012779684737324715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977163022273648, + "compression_loss": 0.0, + "distillation_loss": 0.01884627155959606, + "epoch": 5.73, + "learning_rate": 3.967614349502749e-06, + "loss": 0.0172, + "step": 6035, + "task_loss": 0.002278236672282219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977191675943738, + "compression_loss": 0.0, + "distillation_loss": 0.021827183663845062, + "epoch": 5.73, + "learning_rate": 3.961856293871336e-06, + "loss": 0.0203, + "step": 6036, + "task_loss": 0.0063978079706430435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977220305635863, + "compression_loss": 0.0, + "distillation_loss": 0.0746820718050003, + "epoch": 5.73, + "learning_rate": 3.956102059920036e-06, + "loss": 0.0857, + "step": 6037, + "task_loss": 0.1851119101047516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977248911360059, + "compression_loss": 0.0, + "distillation_loss": 0.04180888831615448, + "epoch": 5.73, + "learning_rate": 3.9503516486941565e-06, + "loss": 0.039, + "step": 6038, + "task_loss": 0.014088783413171768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977277493126363, + "compression_loss": 0.0, + "distillation_loss": 0.02447015978395939, + "epoch": 5.74, + "learning_rate": 3.944605061238277e-06, + "loss": 0.03, + "step": 6039, + "task_loss": 0.07963259518146515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977306050944813, + "compression_loss": 0.0, + "distillation_loss": 0.023615635931491852, + "epoch": 5.74, + "learning_rate": 3.938862298596291e-06, + "loss": 0.0276, + "step": 6040, + "task_loss": 0.06358890980482101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977334584825445, + "compression_loss": 0.0, + "distillation_loss": 0.03558419644832611, + "epoch": 5.74, + "learning_rate": 3.9331233618113974e-06, + "loss": 0.0409, + "step": 6041, + "task_loss": 0.0888403058052063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977363094778297, + "compression_loss": 0.0, + "distillation_loss": 0.016286537051200867, + "epoch": 5.74, + "learning_rate": 3.9273882519261066e-06, + "loss": 0.0152, + "step": 6042, + "task_loss": 0.005049385130405426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977391580813403, + "compression_loss": 0.0, + "distillation_loss": 0.03099142014980316, + "epoch": 5.74, + "learning_rate": 3.921656969982229e-06, + "loss": 0.04, + "step": 6043, + "task_loss": 0.12095896154642105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977420042940803, + "compression_loss": 0.0, + "distillation_loss": 0.09785278886556625, + "epoch": 5.74, + "learning_rate": 3.9159295170208695e-06, + "loss": 0.1007, + "step": 6044, + "task_loss": 0.126520037651062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797744848117053, + "compression_loss": 0.0, + "distillation_loss": 0.015856163576245308, + "epoch": 5.74, + "learning_rate": 3.910205894082461e-06, + "loss": 0.0301, + "step": 6045, + "task_loss": 0.15833263099193573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977476895512625, + "compression_loss": 0.0, + "distillation_loss": 0.06277845799922943, + "epoch": 5.74, + "learning_rate": 3.904486102206717e-06, + "loss": 0.0659, + "step": 6046, + "task_loss": 0.09369910508394241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977505285977123, + "compression_loss": 0.0, + "distillation_loss": 0.05015091598033905, + "epoch": 5.74, + "learning_rate": 3.898770142432676e-06, + "loss": 0.0622, + "step": 6047, + "task_loss": 0.1709066480398178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797753365257406, + "compression_loss": 0.0, + "distillation_loss": 0.02492024190723896, + "epoch": 5.74, + "learning_rate": 3.893058015798656e-06, + "loss": 0.0323, + "step": 6048, + "task_loss": 0.09882645308971405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977561995313475, + "compression_loss": 0.0, + "distillation_loss": 0.017794858664274216, + "epoch": 5.74, + "learning_rate": 3.887349723342304e-06, + "loss": 0.0305, + "step": 6049, + "task_loss": 0.14453651010990143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977590314205403, + "compression_loss": 0.0, + "distillation_loss": 0.023934083059430122, + "epoch": 5.75, + "learning_rate": 3.881645266100556e-06, + "loss": 0.0297, + "step": 6050, + "task_loss": 0.08163676410913467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977618609259881, + "compression_loss": 0.0, + "distillation_loss": 0.04733239859342575, + "epoch": 5.75, + "learning_rate": 3.875944645109647e-06, + "loss": 0.0493, + "step": 6051, + "task_loss": 0.06739036738872528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977646880486946, + "compression_loss": 0.0, + "distillation_loss": 0.07677607238292694, + "epoch": 5.75, + "learning_rate": 3.8702478614051355e-06, + "loss": 0.0831, + "step": 6052, + "task_loss": 0.14044952392578125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977675127896635, + "compression_loss": 0.0, + "distillation_loss": 0.08341547846794128, + "epoch": 5.75, + "learning_rate": 3.864554916021859e-06, + "loss": 0.08, + "step": 6053, + "task_loss": 0.04903567209839821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977703351498985, + "compression_loss": 0.0, + "distillation_loss": 0.04993259161710739, + "epoch": 5.75, + "learning_rate": 3.8588658099939665e-06, + "loss": 0.0565, + "step": 6054, + "task_loss": 0.11562397330999374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977731551304031, + "compression_loss": 0.0, + "distillation_loss": 0.019889146089553833, + "epoch": 5.75, + "learning_rate": 3.853180544354915e-06, + "loss": 0.0268, + "step": 6055, + "task_loss": 0.0885215774178505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977759727321814, + "compression_loss": 0.0, + "distillation_loss": 0.028516318649053574, + "epoch": 5.75, + "learning_rate": 3.847499120137471e-06, + "loss": 0.0281, + "step": 6056, + "task_loss": 0.024541892111301422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977787879562367, + "compression_loss": 0.0, + "distillation_loss": 0.01963622495532036, + "epoch": 5.75, + "learning_rate": 3.84182153837368e-06, + "loss": 0.0326, + "step": 6057, + "task_loss": 0.1491500735282898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977816008035727, + "compression_loss": 0.0, + "distillation_loss": 0.04049837589263916, + "epoch": 5.75, + "learning_rate": 3.836147800094903e-06, + "loss": 0.0401, + "step": 6058, + "task_loss": 0.036487508565187454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977844112751933, + "compression_loss": 0.0, + "distillation_loss": 0.059603024274110794, + "epoch": 5.75, + "learning_rate": 3.830477906331806e-06, + "loss": 0.0588, + "step": 6059, + "task_loss": 0.05121609568595886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977872193721021, + "compression_loss": 0.0, + "distillation_loss": 0.03637555241584778, + "epoch": 5.75, + "learning_rate": 3.824811858114355e-06, + "loss": 0.0334, + "step": 6060, + "task_loss": 0.006767544895410538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977900250953027, + "compression_loss": 0.0, + "distillation_loss": 0.023839876055717468, + "epoch": 5.76, + "learning_rate": 3.819149656471802e-06, + "loss": 0.0227, + "step": 6061, + "task_loss": 0.012284144759178162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977928284457988, + "compression_loss": 0.0, + "distillation_loss": 0.012317647226154804, + "epoch": 5.76, + "learning_rate": 3.8134913024327284e-06, + "loss": 0.0171, + "step": 6062, + "task_loss": 0.0602097250521183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977956294245941, + "compression_loss": 0.0, + "distillation_loss": 0.04070582613348961, + "epoch": 5.76, + "learning_rate": 3.8078367970249885e-06, + "loss": 0.0551, + "step": 6063, + "task_loss": 0.18498341739177704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7977984280326924, + "compression_loss": 0.0, + "distillation_loss": 0.06427805125713348, + "epoch": 5.76, + "learning_rate": 3.8021861412757654e-06, + "loss": 0.069, + "step": 6064, + "task_loss": 0.11124201118946075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978012242710972, + "compression_loss": 0.0, + "distillation_loss": 0.13212275505065918, + "epoch": 5.76, + "learning_rate": 3.796539336211513e-06, + "loss": 0.1268, + "step": 6065, + "task_loss": 0.07930596172809601 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978040181408124, + "compression_loss": 0.0, + "distillation_loss": 0.0323861762881279, + "epoch": 5.76, + "learning_rate": 3.790896382858011e-06, + "loss": 0.0336, + "step": 6066, + "task_loss": 0.04439293220639229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978068096428413, + "compression_loss": 0.0, + "distillation_loss": 0.05183859169483185, + "epoch": 5.76, + "learning_rate": 3.785257282240326e-06, + "loss": 0.0565, + "step": 6067, + "task_loss": 0.09798099845647812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797809598778188, + "compression_loss": 0.0, + "distillation_loss": 0.02488609589636326, + "epoch": 5.76, + "learning_rate": 3.779622035382821e-06, + "loss": 0.0376, + "step": 6068, + "task_loss": 0.15173770487308502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978123855478559, + "compression_loss": 0.0, + "distillation_loss": 0.013482201844453812, + "epoch": 5.76, + "learning_rate": 3.7739906433091804e-06, + "loss": 0.0253, + "step": 6069, + "task_loss": 0.1319684386253357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797815169952849, + "compression_loss": 0.0, + "distillation_loss": 0.013858940452337265, + "epoch": 5.76, + "learning_rate": 3.768363107042361e-06, + "loss": 0.0128, + "step": 6070, + "task_loss": 0.0029547102749347687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978179519941705, + "compression_loss": 0.0, + "distillation_loss": 0.030178038403391838, + "epoch": 5.77, + "learning_rate": 3.7627394276046337e-06, + "loss": 0.0281, + "step": 6071, + "task_loss": 0.009721649810671806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978207316728245, + "compression_loss": 0.0, + "distillation_loss": 0.036198072135448456, + "epoch": 5.77, + "learning_rate": 3.7571196060175672e-06, + "loss": 0.0389, + "step": 6072, + "task_loss": 0.06324063241481781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978235089898145, + "compression_loss": 0.0, + "distillation_loss": 0.03386306017637253, + "epoch": 5.77, + "learning_rate": 3.7515036433020343e-06, + "loss": 0.0425, + "step": 6073, + "task_loss": 0.12017367035150528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978262839461442, + "compression_loss": 0.0, + "distillation_loss": 0.03635266050696373, + "epoch": 5.77, + "learning_rate": 3.7458915404782024e-06, + "loss": 0.0361, + "step": 6074, + "task_loss": 0.034307606518268585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978290565428173, + "compression_loss": 0.0, + "distillation_loss": 0.04068465903401375, + "epoch": 5.77, + "learning_rate": 3.740283298565525e-06, + "loss": 0.0485, + "step": 6075, + "task_loss": 0.11905878782272339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978318267808375, + "compression_loss": 0.0, + "distillation_loss": 0.10536225140094757, + "epoch": 5.77, + "learning_rate": 3.734678918582782e-06, + "loss": 0.1052, + "step": 6076, + "task_loss": 0.10353413224220276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978345946612085, + "compression_loss": 0.0, + "distillation_loss": 0.05913068726658821, + "epoch": 5.77, + "learning_rate": 3.729078401548028e-06, + "loss": 0.0667, + "step": 6077, + "task_loss": 0.1346680372953415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978373601849339, + "compression_loss": 0.0, + "distillation_loss": 0.029158534482121468, + "epoch": 5.77, + "learning_rate": 3.723481748478619e-06, + "loss": 0.038, + "step": 6078, + "task_loss": 0.11800020188093185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978401233530175, + "compression_loss": 0.0, + "distillation_loss": 0.023293327540159225, + "epoch": 5.77, + "learning_rate": 3.717888960391222e-06, + "loss": 0.0218, + "step": 6079, + "task_loss": 0.008854774758219719 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978428841664628, + "compression_loss": 0.0, + "distillation_loss": 0.06899020075798035, + "epoch": 5.77, + "learning_rate": 3.7123000383017944e-06, + "loss": 0.063, + "step": 6080, + "task_loss": 0.009547203779220581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978456426262737, + "compression_loss": 0.0, + "distillation_loss": 0.027045968919992447, + "epoch": 5.77, + "learning_rate": 3.7067149832255787e-06, + "loss": 0.0329, + "step": 6081, + "task_loss": 0.08595190942287445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978483987334537, + "compression_loss": 0.0, + "distillation_loss": 0.18197451531887054, + "epoch": 5.78, + "learning_rate": 3.7011337961771394e-06, + "loss": 0.1829, + "step": 6082, + "task_loss": 0.19158345460891724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978511524890065, + "compression_loss": 0.0, + "distillation_loss": 0.104429692029953, + "epoch": 5.78, + "learning_rate": 3.6955564781703257e-06, + "loss": 0.1017, + "step": 6083, + "task_loss": 0.07724487781524658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797853903893936, + "compression_loss": 0.0, + "distillation_loss": 0.05032842978835106, + "epoch": 5.78, + "learning_rate": 3.6899830302182832e-06, + "loss": 0.0553, + "step": 6084, + "task_loss": 0.1004081666469574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978566529492456, + "compression_loss": 0.0, + "distillation_loss": 0.02652582712471485, + "epoch": 5.78, + "learning_rate": 3.6844134533334437e-06, + "loss": 0.0353, + "step": 6085, + "task_loss": 0.11424309015274048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978593996559391, + "compression_loss": 0.0, + "distillation_loss": 0.03817993402481079, + "epoch": 5.78, + "learning_rate": 3.6788477485275667e-06, + "loss": 0.0446, + "step": 6086, + "task_loss": 0.1024344339966774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978621440150202, + "compression_loss": 0.0, + "distillation_loss": 0.04021666944026947, + "epoch": 5.78, + "learning_rate": 3.673285916811678e-06, + "loss": 0.0425, + "step": 6087, + "task_loss": 0.06331319361925125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978648860274926, + "compression_loss": 0.0, + "distillation_loss": 0.021305495873093605, + "epoch": 5.78, + "learning_rate": 3.66772795919611e-06, + "loss": 0.0294, + "step": 6088, + "task_loss": 0.10244767367839813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79786762569436, + "compression_loss": 0.0, + "distillation_loss": 0.05766810476779938, + "epoch": 5.78, + "learning_rate": 3.6621738766904944e-06, + "loss": 0.0829, + "step": 6089, + "task_loss": 0.3096805214881897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797870363016626, + "compression_loss": 0.0, + "distillation_loss": 0.06154272332787514, + "epoch": 5.78, + "learning_rate": 3.6566236703037633e-06, + "loss": 0.0717, + "step": 6090, + "task_loss": 0.1632338911294937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978730979952942, + "compression_loss": 0.0, + "distillation_loss": 0.11370549350976944, + "epoch": 5.78, + "learning_rate": 3.6510773410441316e-06, + "loss": 0.1306, + "step": 6091, + "task_loss": 0.2824677526950836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978758306313685, + "compression_loss": 0.0, + "distillation_loss": 0.030081721022725105, + "epoch": 5.79, + "learning_rate": 3.6455348899191137e-06, + "loss": 0.0389, + "step": 6092, + "task_loss": 0.11794061213731766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978785609258524, + "compression_loss": 0.0, + "distillation_loss": 0.01757514476776123, + "epoch": 5.79, + "learning_rate": 3.6399963179355347e-06, + "loss": 0.0166, + "step": 6093, + "task_loss": 0.007390303537249565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978812888797497, + "compression_loss": 0.0, + "distillation_loss": 0.019966866821050644, + "epoch": 5.79, + "learning_rate": 3.6344616260994946e-06, + "loss": 0.0241, + "step": 6094, + "task_loss": 0.06160247325897217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978840144940641, + "compression_loss": 0.0, + "distillation_loss": 0.03950147703289986, + "epoch": 5.79, + "learning_rate": 3.6289308154163927e-06, + "loss": 0.0375, + "step": 6095, + "task_loss": 0.019919494166970253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978867377697991, + "compression_loss": 0.0, + "distillation_loss": 0.02330363541841507, + "epoch": 5.79, + "learning_rate": 3.6234038868909357e-06, + "loss": 0.0384, + "step": 6096, + "task_loss": 0.17430183291435242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978894587079586, + "compression_loss": 0.0, + "distillation_loss": 0.020831573754549026, + "epoch": 5.79, + "learning_rate": 3.6178808415271158e-06, + "loss": 0.0274, + "step": 6097, + "task_loss": 0.08686920255422592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978921773095462, + "compression_loss": 0.0, + "distillation_loss": 0.06831928342580795, + "epoch": 5.79, + "learning_rate": 3.6123616803282132e-06, + "loss": 0.0654, + "step": 6098, + "task_loss": 0.03933015465736389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978948935755655, + "compression_loss": 0.0, + "distillation_loss": 0.033079955726861954, + "epoch": 5.79, + "learning_rate": 3.6068464042968123e-06, + "loss": 0.0432, + "step": 6099, + "task_loss": 0.1338474154472351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7978976075070203, + "compression_loss": 0.0, + "distillation_loss": 0.01981043629348278, + "epoch": 5.79, + "learning_rate": 3.6013350144348005e-06, + "loss": 0.0185, + "step": 6100, + "task_loss": 0.00669914111495018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979003191049142, + "compression_loss": 0.0, + "distillation_loss": 0.05477694794535637, + "epoch": 5.79, + "learning_rate": 3.595827511743341e-06, + "loss": 0.0621, + "step": 6101, + "task_loss": 0.12840932607650757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797903028370251, + "compression_loss": 0.0, + "distillation_loss": 0.04909781366586685, + "epoch": 5.79, + "learning_rate": 3.5903238972228916e-06, + "loss": 0.0456, + "step": 6102, + "task_loss": 0.014244996011257172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979057353040342, + "compression_loss": 0.0, + "distillation_loss": 0.015033694915473461, + "epoch": 5.8, + "learning_rate": 3.584824171873219e-06, + "loss": 0.014, + "step": 6103, + "task_loss": 0.004645902663469315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979084399072676, + "compression_loss": 0.0, + "distillation_loss": 0.03330419957637787, + "epoch": 5.8, + "learning_rate": 3.579328336693377e-06, + "loss": 0.0474, + "step": 6104, + "task_loss": 0.17442171275615692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979111421809548, + "compression_loss": 0.0, + "distillation_loss": 0.03729955852031708, + "epoch": 5.8, + "learning_rate": 3.5738363926816954e-06, + "loss": 0.0373, + "step": 6105, + "task_loss": 0.03731003403663635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979138421260996, + "compression_loss": 0.0, + "distillation_loss": 0.028155038133263588, + "epoch": 5.8, + "learning_rate": 3.5683483408358307e-06, + "loss": 0.0345, + "step": 6106, + "task_loss": 0.09175101667642593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979165397437057, + "compression_loss": 0.0, + "distillation_loss": 0.02423456311225891, + "epoch": 5.8, + "learning_rate": 3.5628641821527002e-06, + "loss": 0.0265, + "step": 6107, + "task_loss": 0.04681221395730972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979192350347767, + "compression_loss": 0.0, + "distillation_loss": 0.025493551045656204, + "epoch": 5.8, + "learning_rate": 3.55738391762854e-06, + "loss": 0.0236, + "step": 6108, + "task_loss": 0.00649440661072731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979219280003162, + "compression_loss": 0.0, + "distillation_loss": 0.0265052393078804, + "epoch": 5.8, + "learning_rate": 3.551907548258854e-06, + "loss": 0.0333, + "step": 6109, + "task_loss": 0.09423819929361343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797924618641328, + "compression_loss": 0.0, + "distillation_loss": 0.019546575844287872, + "epoch": 5.8, + "learning_rate": 3.5464350750384594e-06, + "loss": 0.0277, + "step": 6110, + "task_loss": 0.10154897719621658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979273069588158, + "compression_loss": 0.0, + "distillation_loss": 0.06870542466640472, + "epoch": 5.8, + "learning_rate": 3.5409664989614593e-06, + "loss": 0.0715, + "step": 6111, + "task_loss": 0.09712858498096466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979299929537832, + "compression_loss": 0.0, + "distillation_loss": 0.019337208941578865, + "epoch": 5.8, + "learning_rate": 3.5355018210212394e-06, + "loss": 0.0179, + "step": 6112, + "task_loss": 0.004613950848579407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979326766272339, + "compression_loss": 0.0, + "distillation_loss": 0.020329779013991356, + "epoch": 5.81, + "learning_rate": 3.530041042210483e-06, + "loss": 0.0191, + "step": 6113, + "task_loss": 0.008153880015015602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979353579801717, + "compression_loss": 0.0, + "distillation_loss": 0.06501001864671707, + "epoch": 5.81, + "learning_rate": 3.524584163521177e-06, + "loss": 0.0633, + "step": 6114, + "task_loss": 0.048086702823638916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979380370136001, + "compression_loss": 0.0, + "distillation_loss": 0.026702333241701126, + "epoch": 5.81, + "learning_rate": 3.5191311859445796e-06, + "loss": 0.0326, + "step": 6115, + "task_loss": 0.08566763997077942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979407137285228, + "compression_loss": 0.0, + "distillation_loss": 0.07300684601068497, + "epoch": 5.81, + "learning_rate": 3.5136821104712557e-06, + "loss": 0.0717, + "step": 6116, + "task_loss": 0.059550218284130096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979433881259437, + "compression_loss": 0.0, + "distillation_loss": 0.023176930844783783, + "epoch": 5.81, + "learning_rate": 3.5082369380910612e-06, + "loss": 0.0228, + "step": 6117, + "task_loss": 0.01990087516605854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979460602068662, + "compression_loss": 0.0, + "distillation_loss": 0.01764390803873539, + "epoch": 5.81, + "learning_rate": 3.5027956697931324e-06, + "loss": 0.0205, + "step": 6118, + "task_loss": 0.04635511338710785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979487299722942, + "compression_loss": 0.0, + "distillation_loss": 0.049246713519096375, + "epoch": 5.81, + "learning_rate": 3.497358306565901e-06, + "loss": 0.055, + "step": 6119, + "task_loss": 0.1070106029510498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979513974232312, + "compression_loss": 0.0, + "distillation_loss": 0.024939920753240585, + "epoch": 5.81, + "learning_rate": 3.491924849397085e-06, + "loss": 0.0399, + "step": 6120, + "task_loss": 0.1745125949382782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797954062560681, + "compression_loss": 0.0, + "distillation_loss": 0.13209006190299988, + "epoch": 5.81, + "learning_rate": 3.486495299273712e-06, + "loss": 0.1453, + "step": 6121, + "task_loss": 0.2643681466579437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979567253856473, + "compression_loss": 0.0, + "distillation_loss": 0.02114560827612877, + "epoch": 5.81, + "learning_rate": 3.481069657182076e-06, + "loss": 0.0195, + "step": 6122, + "task_loss": 0.004614364355802536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979593858991337, + "compression_loss": 0.0, + "distillation_loss": 0.05115702748298645, + "epoch": 5.81, + "learning_rate": 3.4756479241077667e-06, + "loss": 0.0508, + "step": 6123, + "task_loss": 0.04790136218070984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797962044102144, + "compression_loss": 0.0, + "distillation_loss": 0.015820186585187912, + "epoch": 5.82, + "learning_rate": 3.4702301010356762e-06, + "loss": 0.0145, + "step": 6124, + "task_loss": 0.002711120992898941 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979646999956818, + "compression_loss": 0.0, + "distillation_loss": 0.049572013318538666, + "epoch": 5.82, + "learning_rate": 3.464816188949982e-06, + "loss": 0.0551, + "step": 6125, + "task_loss": 0.10523169487714767 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979673535807508, + "compression_loss": 0.0, + "distillation_loss": 0.020622238516807556, + "epoch": 5.82, + "learning_rate": 3.459406188834141e-06, + "loss": 0.0318, + "step": 6126, + "task_loss": 0.13285362720489502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979700048583546, + "compression_loss": 0.0, + "distillation_loss": 0.027139317244291306, + "epoch": 5.82, + "learning_rate": 3.454000101670901e-06, + "loss": 0.0251, + "step": 6127, + "task_loss": 0.006299933418631554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979726538294969, + "compression_loss": 0.0, + "distillation_loss": 0.008850215002894402, + "epoch": 5.82, + "learning_rate": 3.4485979284423155e-06, + "loss": 0.0129, + "step": 6128, + "task_loss": 0.04908328130841255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979753004951815, + "compression_loss": 0.0, + "distillation_loss": 0.031228946521878242, + "epoch": 5.82, + "learning_rate": 3.443199670129707e-06, + "loss": 0.0441, + "step": 6129, + "task_loss": 0.15944904088974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979779448564119, + "compression_loss": 0.0, + "distillation_loss": 0.13153165578842163, + "epoch": 5.82, + "learning_rate": 3.4378053277136946e-06, + "loss": 0.1276, + "step": 6130, + "task_loss": 0.09214229881763458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.797980586914192, + "compression_loss": 0.0, + "distillation_loss": 0.04689065366983414, + "epoch": 5.82, + "learning_rate": 3.4324149021741915e-06, + "loss": 0.0474, + "step": 6131, + "task_loss": 0.05218418687582016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979832266695255, + "compression_loss": 0.0, + "distillation_loss": 0.0955977588891983, + "epoch": 5.82, + "learning_rate": 3.4270283944903946e-06, + "loss": 0.087, + "step": 6132, + "task_loss": 0.009947611019015312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979858641234158, + "compression_loss": 0.0, + "distillation_loss": 0.11793653666973114, + "epoch": 5.82, + "learning_rate": 3.4216458056407775e-06, + "loss": 0.1168, + "step": 6133, + "task_loss": 0.10687369108200073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979884992768668, + "compression_loss": 0.0, + "distillation_loss": 0.016993030905723572, + "epoch": 5.83, + "learning_rate": 3.416267136603124e-06, + "loss": 0.016, + "step": 6134, + "task_loss": 0.006749266758561134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979911321308821, + "compression_loss": 0.0, + "distillation_loss": 0.03799954056739807, + "epoch": 5.83, + "learning_rate": 3.4108923883545004e-06, + "loss": 0.0412, + "step": 6135, + "task_loss": 0.0700988918542862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979937626864654, + "compression_loss": 0.0, + "distillation_loss": 0.02764744684100151, + "epoch": 5.83, + "learning_rate": 3.405521561871247e-06, + "loss": 0.0338, + "step": 6136, + "task_loss": 0.08966173231601715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979963909446204, + "compression_loss": 0.0, + "distillation_loss": 0.017384584993124008, + "epoch": 5.83, + "learning_rate": 3.400154658128998e-06, + "loss": 0.0161, + "step": 6137, + "task_loss": 0.004670457914471626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7979990169063508, + "compression_loss": 0.0, + "distillation_loss": 0.029262276366353035, + "epoch": 5.83, + "learning_rate": 3.3947916781026877e-06, + "loss": 0.0339, + "step": 6138, + "task_loss": 0.07558421790599823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980016405726602, + "compression_loss": 0.0, + "distillation_loss": 0.02195669896900654, + "epoch": 5.83, + "learning_rate": 3.3894326227665196e-06, + "loss": 0.0203, + "step": 6139, + "task_loss": 0.005175255239009857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980042619445524, + "compression_loss": 0.0, + "distillation_loss": 0.017028218135237694, + "epoch": 5.83, + "learning_rate": 3.384077493093987e-06, + "loss": 0.0157, + "step": 6140, + "task_loss": 0.003494156524538994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798006881023031, + "compression_loss": 0.0, + "distillation_loss": 0.01528292428702116, + "epoch": 5.83, + "learning_rate": 3.378726290057882e-06, + "loss": 0.0142, + "step": 6141, + "task_loss": 0.004559960216283798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980094978090997, + "compression_loss": 0.0, + "distillation_loss": 0.014390531927347183, + "epoch": 5.83, + "learning_rate": 3.373379014630279e-06, + "loss": 0.02, + "step": 6142, + "task_loss": 0.07083210349082947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980121123037621, + "compression_loss": 0.0, + "distillation_loss": 0.0584423765540123, + "epoch": 5.83, + "learning_rate": 3.368035667782535e-06, + "loss": 0.0641, + "step": 6143, + "task_loss": 0.11510256677865982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798014724508022, + "compression_loss": 0.0, + "distillation_loss": 0.035305943340063095, + "epoch": 5.83, + "learning_rate": 3.3626962504852865e-06, + "loss": 0.0345, + "step": 6144, + "task_loss": 0.026764407753944397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980173344228831, + "compression_loss": 0.0, + "distillation_loss": 0.026942508295178413, + "epoch": 5.84, + "learning_rate": 3.357360763708478e-06, + "loss": 0.0398, + "step": 6145, + "task_loss": 0.15590748190879822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798019942049349, + "compression_loss": 0.0, + "distillation_loss": 0.10806174576282501, + "epoch": 5.84, + "learning_rate": 3.3520292084213145e-06, + "loss": 0.1066, + "step": 6146, + "task_loss": 0.09339408576488495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980225473884235, + "compression_loss": 0.0, + "distillation_loss": 0.03551897406578064, + "epoch": 5.84, + "learning_rate": 3.3467015855923016e-06, + "loss": 0.0324, + "step": 6147, + "task_loss": 0.004185806959867477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79802515044111, + "compression_loss": 0.0, + "distillation_loss": 0.18967923521995544, + "epoch": 5.84, + "learning_rate": 3.3413778961892324e-06, + "loss": 0.1758, + "step": 6148, + "task_loss": 0.05130823701620102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980277512084125, + "compression_loss": 0.0, + "distillation_loss": 0.0280881579965353, + "epoch": 5.84, + "learning_rate": 3.33605814117918e-06, + "loss": 0.0319, + "step": 6149, + "task_loss": 0.06627509742975235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980303496913346, + "compression_loss": 0.0, + "distillation_loss": 0.014908300712704659, + "epoch": 5.84, + "learning_rate": 3.3307423215284933e-06, + "loss": 0.0137, + "step": 6150, + "task_loss": 0.002931041643023491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980329458908798, + "compression_loss": 0.0, + "distillation_loss": 0.04687492176890373, + "epoch": 5.84, + "learning_rate": 3.325430438202823e-06, + "loss": 0.0432, + "step": 6151, + "task_loss": 0.009853720664978027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798035539808052, + "compression_loss": 0.0, + "distillation_loss": 0.08521527796983719, + "epoch": 5.84, + "learning_rate": 3.320122492167108e-06, + "loss": 0.0926, + "step": 6152, + "task_loss": 0.15886865556240082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980381314438548, + "compression_loss": 0.0, + "distillation_loss": 0.023487182334065437, + "epoch": 5.84, + "learning_rate": 3.3148184843855513e-06, + "loss": 0.0218, + "step": 6153, + "task_loss": 0.0069718193262815475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980407207992919, + "compression_loss": 0.0, + "distillation_loss": 0.008888408541679382, + "epoch": 5.84, + "learning_rate": 3.3095184158216497e-06, + "loss": 0.0085, + "step": 6154, + "task_loss": 0.005421014502644539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980433078753669, + "compression_loss": 0.0, + "distillation_loss": 0.014762504026293755, + "epoch": 5.85, + "learning_rate": 3.304222287438194e-06, + "loss": 0.0202, + "step": 6155, + "task_loss": 0.06863877922296524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980458926730837, + "compression_loss": 0.0, + "distillation_loss": 0.09593481570482254, + "epoch": 5.85, + "learning_rate": 3.2989301001972505e-06, + "loss": 0.1038, + "step": 6156, + "task_loss": 0.17508172988891602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980484751934457, + "compression_loss": 0.0, + "distillation_loss": 0.04558049887418747, + "epoch": 5.85, + "learning_rate": 3.293641855060162e-06, + "loss": 0.0424, + "step": 6157, + "task_loss": 0.013653889298439026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980510554374567, + "compression_loss": 0.0, + "distillation_loss": 0.021937305107712746, + "epoch": 5.85, + "learning_rate": 3.2883575529875726e-06, + "loss": 0.0212, + "step": 6158, + "task_loss": 0.01466263085603714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980536334061205, + "compression_loss": 0.0, + "distillation_loss": 0.02071615308523178, + "epoch": 5.85, + "learning_rate": 3.2830771949393934e-06, + "loss": 0.0194, + "step": 6159, + "task_loss": 0.007383199408650398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980562091004405, + "compression_loss": 0.0, + "distillation_loss": 0.02836211957037449, + "epoch": 5.85, + "learning_rate": 3.27780078187484e-06, + "loss": 0.0417, + "step": 6160, + "task_loss": 0.16145291924476624 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980587825214207, + "compression_loss": 0.0, + "distillation_loss": 0.06770492345094681, + "epoch": 5.85, + "learning_rate": 3.272528314752382e-06, + "loss": 0.0737, + "step": 6161, + "task_loss": 0.12756067514419556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980613536700646, + "compression_loss": 0.0, + "distillation_loss": 0.041868340224027634, + "epoch": 5.85, + "learning_rate": 3.2672597945298006e-06, + "loss": 0.0427, + "step": 6162, + "task_loss": 0.04990936070680618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980639225473759, + "compression_loss": 0.0, + "distillation_loss": 0.012833887711167336, + "epoch": 5.85, + "learning_rate": 3.2619952221641436e-06, + "loss": 0.0204, + "step": 6163, + "task_loss": 0.08840099722146988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980664891543584, + "compression_loss": 0.0, + "distillation_loss": 0.07975931465625763, + "epoch": 5.85, + "learning_rate": 3.2567345986117437e-06, + "loss": 0.0906, + "step": 6164, + "task_loss": 0.188090518116951 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980690534920156, + "compression_loss": 0.0, + "distillation_loss": 0.04811672866344452, + "epoch": 5.85, + "learning_rate": 3.251477924828222e-06, + "loss": 0.0546, + "step": 6165, + "task_loss": 0.11324407160282135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980716155613513, + "compression_loss": 0.0, + "distillation_loss": 0.023004433140158653, + "epoch": 5.86, + "learning_rate": 3.2462252017684797e-06, + "loss": 0.0273, + "step": 6166, + "task_loss": 0.06551149487495422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980741753633692, + "compression_loss": 0.0, + "distillation_loss": 0.039101071655750275, + "epoch": 5.86, + "learning_rate": 3.240976430386691e-06, + "loss": 0.0382, + "step": 6167, + "task_loss": 0.030401092022657394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980767328990729, + "compression_loss": 0.0, + "distillation_loss": 0.03869537636637688, + "epoch": 5.86, + "learning_rate": 3.2357316116363278e-06, + "loss": 0.0375, + "step": 6168, + "task_loss": 0.026733241975307465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798079288169466, + "compression_loss": 0.0, + "distillation_loss": 0.016314208507537842, + "epoch": 5.86, + "learning_rate": 3.2304907464701384e-06, + "loss": 0.0184, + "step": 6169, + "task_loss": 0.037635624408721924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980818411755525, + "compression_loss": 0.0, + "distillation_loss": 0.15428408980369568, + "epoch": 5.86, + "learning_rate": 3.225253835840147e-06, + "loss": 0.1555, + "step": 6170, + "task_loss": 0.16609327495098114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980843919183357, + "compression_loss": 0.0, + "distillation_loss": 0.024836333468556404, + "epoch": 5.86, + "learning_rate": 3.2200208806976634e-06, + "loss": 0.0298, + "step": 6171, + "task_loss": 0.07457348704338074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980869403988196, + "compression_loss": 0.0, + "distillation_loss": 0.025460699573159218, + "epoch": 5.86, + "learning_rate": 3.2147918819932814e-06, + "loss": 0.0235, + "step": 6172, + "task_loss": 0.00573544017970562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980894866180076, + "compression_loss": 0.0, + "distillation_loss": 0.020409489050507545, + "epoch": 5.86, + "learning_rate": 3.209566840676875e-06, + "loss": 0.0189, + "step": 6173, + "task_loss": 0.00523163378238678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980920305769037, + "compression_loss": 0.0, + "distillation_loss": 0.04639172554016113, + "epoch": 5.86, + "learning_rate": 3.204345757697591e-06, + "loss": 0.0423, + "step": 6174, + "task_loss": 0.005238974466919899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980945722765113, + "compression_loss": 0.0, + "distillation_loss": 0.07041463255882263, + "epoch": 5.86, + "learning_rate": 3.199128634003873e-06, + "loss": 0.0673, + "step": 6175, + "task_loss": 0.03931209072470665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980971117178342, + "compression_loss": 0.0, + "distillation_loss": 0.10844946652650833, + "epoch": 5.87, + "learning_rate": 3.1939154705434267e-06, + "loss": 0.1306, + "step": 6176, + "task_loss": 0.32991883158683777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7980996489018761, + "compression_loss": 0.0, + "distillation_loss": 0.0225403793156147, + "epoch": 5.87, + "learning_rate": 3.188706268263261e-06, + "loss": 0.0212, + "step": 6177, + "task_loss": 0.009059395641088486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981021838296406, + "compression_loss": 0.0, + "distillation_loss": 0.0714748352766037, + "epoch": 5.87, + "learning_rate": 3.183501028109642e-06, + "loss": 0.0773, + "step": 6178, + "task_loss": 0.1296701580286026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981047165021314, + "compression_loss": 0.0, + "distillation_loss": 0.043413858860731125, + "epoch": 5.87, + "learning_rate": 3.1782997510281352e-06, + "loss": 0.0535, + "step": 6179, + "task_loss": 0.14417198300361633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981072469203524, + "compression_loss": 0.0, + "distillation_loss": 0.044636182487010956, + "epoch": 5.87, + "learning_rate": 3.173102437963571e-06, + "loss": 0.0413, + "step": 6180, + "task_loss": 0.011053688824176788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798109775085307, + "compression_loss": 0.0, + "distillation_loss": 0.04771566390991211, + "epoch": 5.87, + "learning_rate": 3.1679090898600663e-06, + "loss": 0.0465, + "step": 6181, + "task_loss": 0.035778842866420746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798112300997999, + "compression_loss": 0.0, + "distillation_loss": 0.14893057942390442, + "epoch": 5.87, + "learning_rate": 3.1627197076610243e-06, + "loss": 0.149, + "step": 6182, + "task_loss": 0.14932629466056824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798114824659432, + "compression_loss": 0.0, + "distillation_loss": 0.04840100184082985, + "epoch": 5.87, + "learning_rate": 3.157534292309114e-06, + "loss": 0.0514, + "step": 6183, + "task_loss": 0.07838822156190872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981173460706098, + "compression_loss": 0.0, + "distillation_loss": 0.04644680395722389, + "epoch": 5.87, + "learning_rate": 3.1523528447462925e-06, + "loss": 0.0423, + "step": 6184, + "task_loss": 0.004648592323064804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798119865232536, + "compression_loss": 0.0, + "distillation_loss": 0.12863226234912872, + "epoch": 5.87, + "learning_rate": 3.147175365913793e-06, + "loss": 0.1241, + "step": 6185, + "task_loss": 0.08361361920833588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981223821462143, + "compression_loss": 0.0, + "distillation_loss": 0.10540250688791275, + "epoch": 5.87, + "learning_rate": 3.1420018567521412e-06, + "loss": 0.1011, + "step": 6186, + "task_loss": 0.06201855093240738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981248968126484, + "compression_loss": 0.0, + "distillation_loss": 0.027073774486780167, + "epoch": 5.88, + "learning_rate": 3.136832318201119e-06, + "loss": 0.0254, + "step": 6187, + "task_loss": 0.010364262387156487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981274092328421, + "compression_loss": 0.0, + "distillation_loss": 0.09114031493663788, + "epoch": 5.88, + "learning_rate": 3.1316667511997967e-06, + "loss": 0.0891, + "step": 6188, + "task_loss": 0.07045449316501617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981299194077989, + "compression_loss": 0.0, + "distillation_loss": 0.017900224775075912, + "epoch": 5.88, + "learning_rate": 3.126505156686532e-06, + "loss": 0.0242, + "step": 6189, + "task_loss": 0.08130859583616257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981324273385224, + "compression_loss": 0.0, + "distillation_loss": 0.011481826193630695, + "epoch": 5.88, + "learning_rate": 3.121347535598948e-06, + "loss": 0.0107, + "step": 6190, + "task_loss": 0.003604454919695854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981349330260166, + "compression_loss": 0.0, + "distillation_loss": 0.02443830482661724, + "epoch": 5.88, + "learning_rate": 3.11619388887395e-06, + "loss": 0.0247, + "step": 6191, + "task_loss": 0.026645643636584282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981374364712849, + "compression_loss": 0.0, + "distillation_loss": 0.049847427755594254, + "epoch": 5.88, + "learning_rate": 3.111044217447731e-06, + "loss": 0.0453, + "step": 6192, + "task_loss": 0.004831980913877487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981399376753311, + "compression_loss": 0.0, + "distillation_loss": 0.027359748259186745, + "epoch": 5.88, + "learning_rate": 3.105898522255743e-06, + "loss": 0.0262, + "step": 6193, + "task_loss": 0.015636542811989784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981424366391588, + "compression_loss": 0.0, + "distillation_loss": 0.02219046652317047, + "epoch": 5.88, + "learning_rate": 3.100756804232735e-06, + "loss": 0.0209, + "step": 6194, + "task_loss": 0.008930448442697525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981449333637719, + "compression_loss": 0.0, + "distillation_loss": 0.01434510201215744, + "epoch": 5.88, + "learning_rate": 3.095619064312719e-06, + "loss": 0.0132, + "step": 6195, + "task_loss": 0.0025472547858953476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981474278501739, + "compression_loss": 0.0, + "distillation_loss": 0.014208871871232986, + "epoch": 5.88, + "learning_rate": 3.0904853034289943e-06, + "loss": 0.0166, + "step": 6196, + "task_loss": 0.03827598690986633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981499200993685, + "compression_loss": 0.0, + "distillation_loss": 0.028200728818774223, + "epoch": 5.89, + "learning_rate": 3.085355522514136e-06, + "loss": 0.0336, + "step": 6197, + "task_loss": 0.08197523653507233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981524101123594, + "compression_loss": 0.0, + "distillation_loss": 0.04185293987393379, + "epoch": 5.89, + "learning_rate": 3.0802297224999805e-06, + "loss": 0.0391, + "step": 6198, + "task_loss": 0.014529986307024956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981548978901502, + "compression_loss": 0.0, + "distillation_loss": 0.02289126068353653, + "epoch": 5.89, + "learning_rate": 3.0751079043176673e-06, + "loss": 0.0286, + "step": 6199, + "task_loss": 0.08047693967819214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981573834337446, + "compression_loss": 0.0, + "distillation_loss": 0.014290563762187958, + "epoch": 5.89, + "learning_rate": 3.0699900688975964e-06, + "loss": 0.0134, + "step": 6200, + "task_loss": 0.005466701462864876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981598667441465, + "compression_loss": 0.0, + "distillation_loss": 0.11609383672475815, + "epoch": 5.89, + "learning_rate": 3.064876217169446e-06, + "loss": 0.1114, + "step": 6201, + "task_loss": 0.06948763132095337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981623478223594, + "compression_loss": 0.0, + "distillation_loss": 0.05835815891623497, + "epoch": 5.89, + "learning_rate": 3.05976635006216e-06, + "loss": 0.0661, + "step": 6202, + "task_loss": 0.13556569814682007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798164826669387, + "compression_loss": 0.0, + "distillation_loss": 0.13488264381885529, + "epoch": 5.89, + "learning_rate": 3.0546604685039947e-06, + "loss": 0.1469, + "step": 6203, + "task_loss": 0.25527501106262207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798167303286233, + "compression_loss": 0.0, + "distillation_loss": 0.052829310297966, + "epoch": 5.89, + "learning_rate": 3.0495585734224424e-06, + "loss": 0.059, + "step": 6204, + "task_loss": 0.1148587167263031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981697776739011, + "compression_loss": 0.0, + "distillation_loss": 0.01467627938836813, + "epoch": 5.89, + "learning_rate": 3.044460665744284e-06, + "loss": 0.0242, + "step": 6205, + "task_loss": 0.11007630825042725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981722498333949, + "compression_loss": 0.0, + "distillation_loss": 0.017747841775417328, + "epoch": 5.89, + "learning_rate": 3.0393667463955914e-06, + "loss": 0.0164, + "step": 6206, + "task_loss": 0.004152687266469002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981747197657181, + "compression_loss": 0.0, + "distillation_loss": 0.043606314808130264, + "epoch": 5.89, + "learning_rate": 3.034276816301693e-06, + "loss": 0.0501, + "step": 6207, + "task_loss": 0.10881586372852325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981771874718745, + "compression_loss": 0.0, + "distillation_loss": 0.03161245957016945, + "epoch": 5.9, + "learning_rate": 3.0291908763872e-06, + "loss": 0.0308, + "step": 6208, + "task_loss": 0.023898892104625702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981796529528676, + "compression_loss": 0.0, + "distillation_loss": 0.1580495983362198, + "epoch": 5.9, + "learning_rate": 3.0241089275759886e-06, + "loss": 0.1547, + "step": 6209, + "task_loss": 0.12449827790260315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981821162097013, + "compression_loss": 0.0, + "distillation_loss": 0.017794454470276833, + "epoch": 5.9, + "learning_rate": 3.019030970791237e-06, + "loss": 0.0165, + "step": 6210, + "task_loss": 0.004612291231751442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981845772433791, + "compression_loss": 0.0, + "distillation_loss": 0.06614623963832855, + "epoch": 5.9, + "learning_rate": 3.013957006955362e-06, + "loss": 0.0748, + "step": 6211, + "task_loss": 0.1525183916091919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981870360549047, + "compression_loss": 0.0, + "distillation_loss": 0.10029789060354233, + "epoch": 5.9, + "learning_rate": 3.008887036990085e-06, + "loss": 0.0955, + "step": 6212, + "task_loss": 0.05255453288555145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981894926452819, + "compression_loss": 0.0, + "distillation_loss": 0.08173903077840805, + "epoch": 5.9, + "learning_rate": 3.003821061816392e-06, + "loss": 0.0937, + "step": 6213, + "task_loss": 0.20181068778038025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981919470155143, + "compression_loss": 0.0, + "distillation_loss": 0.02389482408761978, + "epoch": 5.9, + "learning_rate": 2.998759082354538e-06, + "loss": 0.0471, + "step": 6214, + "task_loss": 0.25638294219970703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981943991666055, + "compression_loss": 0.0, + "distillation_loss": 0.060066595673561096, + "epoch": 5.9, + "learning_rate": 2.9937010995240543e-06, + "loss": 0.0666, + "step": 6215, + "task_loss": 0.12505532801151276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981968490995595, + "compression_loss": 0.0, + "distillation_loss": 0.03829333186149597, + "epoch": 5.9, + "learning_rate": 2.988647114243748e-06, + "loss": 0.0417, + "step": 6216, + "task_loss": 0.0723431333899498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7981992968153795, + "compression_loss": 0.0, + "distillation_loss": 0.06221352517604828, + "epoch": 5.9, + "learning_rate": 2.983597127431703e-06, + "loss": 0.0646, + "step": 6217, + "task_loss": 0.08588936179876328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982017423150696, + "compression_loss": 0.0, + "distillation_loss": 0.0338950976729393, + "epoch": 5.91, + "learning_rate": 2.9785511400052727e-06, + "loss": 0.0451, + "step": 6218, + "task_loss": 0.14563745260238647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982041855996332, + "compression_loss": 0.0, + "distillation_loss": 0.10232432931661606, + "epoch": 5.91, + "learning_rate": 2.973509152881079e-06, + "loss": 0.1044, + "step": 6219, + "task_loss": 0.12259991466999054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982066266700742, + "compression_loss": 0.0, + "distillation_loss": 0.043488338589668274, + "epoch": 5.91, + "learning_rate": 2.9684711669750313e-06, + "loss": 0.0447, + "step": 6220, + "task_loss": 0.05578209087252617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982090655273961, + "compression_loss": 0.0, + "distillation_loss": 0.02594229206442833, + "epoch": 5.91, + "learning_rate": 2.963437183202303e-06, + "loss": 0.0315, + "step": 6221, + "task_loss": 0.08114132285118103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982115021726028, + "compression_loss": 0.0, + "distillation_loss": 0.03786736726760864, + "epoch": 5.91, + "learning_rate": 2.9584072024773395e-06, + "loss": 0.0364, + "step": 6222, + "task_loss": 0.022759338840842247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982139366066978, + "compression_loss": 0.0, + "distillation_loss": 0.01899906061589718, + "epoch": 5.91, + "learning_rate": 2.953381225713858e-06, + "loss": 0.0281, + "step": 6223, + "task_loss": 0.110114686191082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982163688306848, + "compression_loss": 0.0, + "distillation_loss": 0.07625134289264679, + "epoch": 5.91, + "learning_rate": 2.948359253824859e-06, + "loss": 0.0865, + "step": 6224, + "task_loss": 0.1790730506181717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982187988455675, + "compression_loss": 0.0, + "distillation_loss": 0.04318426176905632, + "epoch": 5.91, + "learning_rate": 2.9433412877226036e-06, + "loss": 0.0521, + "step": 6225, + "task_loss": 0.1328027844429016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982212266523496, + "compression_loss": 0.0, + "distillation_loss": 0.11644196510314941, + "epoch": 5.91, + "learning_rate": 2.938327328318624e-06, + "loss": 0.1194, + "step": 6226, + "task_loss": 0.14612026512622833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982236522520348, + "compression_loss": 0.0, + "distillation_loss": 0.04620961844921112, + "epoch": 5.91, + "learning_rate": 2.9333173765237394e-06, + "loss": 0.0492, + "step": 6227, + "task_loss": 0.07610806822776794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982260756456268, + "compression_loss": 0.0, + "distillation_loss": 0.02641240693628788, + "epoch": 5.91, + "learning_rate": 2.928311433248024e-06, + "loss": 0.025, + "step": 6228, + "task_loss": 0.012464674189686775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982284968341291, + "compression_loss": 0.0, + "distillation_loss": 0.04388430714607239, + "epoch": 5.92, + "learning_rate": 2.923309499400839e-06, + "loss": 0.0429, + "step": 6229, + "task_loss": 0.03393597900867462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982309158185457, + "compression_loss": 0.0, + "distillation_loss": 0.0793541669845581, + "epoch": 5.92, + "learning_rate": 2.918311575890803e-06, + "loss": 0.0829, + "step": 6230, + "task_loss": 0.11435024440288544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982333325998799, + "compression_loss": 0.0, + "distillation_loss": 0.0714527815580368, + "epoch": 5.92, + "learning_rate": 2.9133176636258196e-06, + "loss": 0.0659, + "step": 6231, + "task_loss": 0.015902981162071228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982357471791358, + "compression_loss": 0.0, + "distillation_loss": 0.07313866913318634, + "epoch": 5.92, + "learning_rate": 2.9083277635130523e-06, + "loss": 0.0717, + "step": 6232, + "task_loss": 0.05869182199239731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982381595573168, + "compression_loss": 0.0, + "distillation_loss": 0.010230525396764278, + "epoch": 5.92, + "learning_rate": 2.903341876458937e-06, + "loss": 0.0096, + "step": 6233, + "task_loss": 0.004364604130387306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982405697354266, + "compression_loss": 0.0, + "distillation_loss": 0.040943779051303864, + "epoch": 5.92, + "learning_rate": 2.8983600033691936e-06, + "loss": 0.046, + "step": 6234, + "task_loss": 0.09106667339801788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798242977714469, + "compression_loss": 0.0, + "distillation_loss": 0.08053984493017197, + "epoch": 5.92, + "learning_rate": 2.8933821451487987e-06, + "loss": 0.0847, + "step": 6235, + "task_loss": 0.12223599851131439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982453834954475, + "compression_loss": 0.0, + "distillation_loss": 0.018065018579363823, + "epoch": 5.92, + "learning_rate": 2.8884083027019976e-06, + "loss": 0.0273, + "step": 6236, + "task_loss": 0.11061768233776093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982477870793661, + "compression_loss": 0.0, + "distillation_loss": 0.049103301018476486, + "epoch": 5.92, + "learning_rate": 2.8834384769323207e-06, + "loss": 0.0446, + "step": 6237, + "task_loss": 0.0044719260185956955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982501884672281, + "compression_loss": 0.0, + "distillation_loss": 0.0168076790869236, + "epoch": 5.92, + "learning_rate": 2.878472668742563e-06, + "loss": 0.0157, + "step": 6238, + "task_loss": 0.005568409338593483 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982525876600374, + "compression_loss": 0.0, + "distillation_loss": 0.14126557111740112, + "epoch": 5.92, + "learning_rate": 2.8735108790347824e-06, + "loss": 0.1368, + "step": 6239, + "task_loss": 0.0968267098069191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982549846587976, + "compression_loss": 0.0, + "distillation_loss": 0.028058458119630814, + "epoch": 5.93, + "learning_rate": 2.8685531087103092e-06, + "loss": 0.0302, + "step": 6240, + "task_loss": 0.049860235303640366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982573794645126, + "compression_loss": 0.0, + "distillation_loss": 0.028030022978782654, + "epoch": 5.93, + "learning_rate": 2.8635993586697553e-06, + "loss": 0.0262, + "step": 6241, + "task_loss": 0.010059693828225136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982597720781858, + "compression_loss": 0.0, + "distillation_loss": 0.10418315976858139, + "epoch": 5.93, + "learning_rate": 2.8586496298129873e-06, + "loss": 0.1219, + "step": 6242, + "task_loss": 0.28089088201522827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982621625008209, + "compression_loss": 0.0, + "distillation_loss": 0.01780703291296959, + "epoch": 5.93, + "learning_rate": 2.8537039230391416e-06, + "loss": 0.0163, + "step": 6243, + "task_loss": 0.003232475370168686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982645507334218, + "compression_loss": 0.0, + "distillation_loss": 0.07454448938369751, + "epoch": 5.93, + "learning_rate": 2.8487622392466438e-06, + "loss": 0.0806, + "step": 6244, + "task_loss": 0.13465920090675354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798266936776992, + "compression_loss": 0.0, + "distillation_loss": 0.025488190352916718, + "epoch": 5.93, + "learning_rate": 2.8438245793331598e-06, + "loss": 0.0241, + "step": 6245, + "task_loss": 0.011609837412834167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982693206325353, + "compression_loss": 0.0, + "distillation_loss": 0.11189248412847519, + "epoch": 5.93, + "learning_rate": 2.8388909441956517e-06, + "loss": 0.1093, + "step": 6246, + "task_loss": 0.08592940866947174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982717023010553, + "compression_loss": 0.0, + "distillation_loss": 0.01671120896935463, + "epoch": 5.93, + "learning_rate": 2.8339613347303283e-06, + "loss": 0.0154, + "step": 6247, + "task_loss": 0.003899950534105301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982740817835556, + "compression_loss": 0.0, + "distillation_loss": 0.03453841432929039, + "epoch": 5.93, + "learning_rate": 2.8290357518326843e-06, + "loss": 0.0494, + "step": 6248, + "task_loss": 0.18323814868927002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982764590810402, + "compression_loss": 0.0, + "distillation_loss": 0.04547137767076492, + "epoch": 5.93, + "learning_rate": 2.824114196397476e-06, + "loss": 0.0428, + "step": 6249, + "task_loss": 0.01905733346939087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982788341945124, + "compression_loss": 0.0, + "distillation_loss": 0.02116478979587555, + "epoch": 5.94, + "learning_rate": 2.819196669318716e-06, + "loss": 0.0196, + "step": 6250, + "task_loss": 0.0053593870252370834 + }, + { + "epoch": 5.94, + "eval_accuracy": 0.8841743119266054, + "eval_loss": 0.43586060404777527, + "eval_runtime": 17.6466, + "eval_samples_per_second": 49.415, + "eval_steps_per_second": 6.177, + "step": 6250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982812071249761, + "compression_loss": 0.0, + "distillation_loss": 0.013482020236551762, + "epoch": 5.94, + "learning_rate": 2.814283171489712e-06, + "loss": 0.0127, + "step": 6251, + "task_loss": 0.0053385235369205475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982835778734348, + "compression_loss": 0.0, + "distillation_loss": 0.08214183896780014, + "epoch": 5.94, + "learning_rate": 2.809373703803017e-06, + "loss": 0.0771, + "step": 6252, + "task_loss": 0.0316874198615551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982859464408925, + "compression_loss": 0.0, + "distillation_loss": 0.03672255948185921, + "epoch": 5.94, + "learning_rate": 2.8044682671504536e-06, + "loss": 0.0444, + "step": 6253, + "task_loss": 0.11331107467412949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982883128283526, + "compression_loss": 0.0, + "distillation_loss": 0.08191946148872375, + "epoch": 5.94, + "learning_rate": 2.7995668624231253e-06, + "loss": 0.0842, + "step": 6254, + "task_loss": 0.10507936030626297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982906770368189, + "compression_loss": 0.0, + "distillation_loss": 0.04623375087976456, + "epoch": 5.94, + "learning_rate": 2.7946694905114006e-06, + "loss": 0.0583, + "step": 6255, + "task_loss": 0.16685155034065247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798293039067295, + "compression_loss": 0.0, + "distillation_loss": 0.012130487710237503, + "epoch": 5.94, + "learning_rate": 2.789776152304904e-06, + "loss": 0.0189, + "step": 6256, + "task_loss": 0.0798635482788086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982953989207847, + "compression_loss": 0.0, + "distillation_loss": 0.02477359026670456, + "epoch": 5.94, + "learning_rate": 2.7848868486925307e-06, + "loss": 0.0283, + "step": 6257, + "task_loss": 0.0600624680519104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7982977565982916, + "compression_loss": 0.0, + "distillation_loss": 0.022501779720187187, + "epoch": 5.94, + "learning_rate": 2.7800015805624526e-06, + "loss": 0.0403, + "step": 6258, + "task_loss": 0.20015643537044525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983001121008194, + "compression_loss": 0.0, + "distillation_loss": 0.011296769604086876, + "epoch": 5.94, + "learning_rate": 2.7751203488021042e-06, + "loss": 0.019, + "step": 6259, + "task_loss": 0.08792918175458908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983024654293718, + "compression_loss": 0.0, + "distillation_loss": 0.030689077451825142, + "epoch": 5.94, + "learning_rate": 2.7702431542981715e-06, + "loss": 0.038, + "step": 6260, + "task_loss": 0.10409321635961533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983048165849524, + "compression_loss": 0.0, + "distillation_loss": 0.07838670909404755, + "epoch": 5.95, + "learning_rate": 2.765369997936637e-06, + "loss": 0.0724, + "step": 6261, + "task_loss": 0.018565570935606956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983071655685651, + "compression_loss": 0.0, + "distillation_loss": 0.02527971751987934, + "epoch": 5.95, + "learning_rate": 2.7605008806027206e-06, + "loss": 0.0422, + "step": 6262, + "task_loss": 0.19478943943977356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983095123812134, + "compression_loss": 0.0, + "distillation_loss": 0.013227314688265324, + "epoch": 5.95, + "learning_rate": 2.7556358031809308e-06, + "loss": 0.0121, + "step": 6263, + "task_loss": 0.002236088737845421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798311857023901, + "compression_loss": 0.0, + "distillation_loss": 0.013676393777132034, + "epoch": 5.95, + "learning_rate": 2.7507747665550227e-06, + "loss": 0.014, + "step": 6264, + "task_loss": 0.01731892116367817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983141994976315, + "compression_loss": 0.0, + "distillation_loss": 0.05857694894075394, + "epoch": 5.95, + "learning_rate": 2.7459177716080363e-06, + "loss": 0.0554, + "step": 6265, + "task_loss": 0.02669447846710682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983165398034088, + "compression_loss": 0.0, + "distillation_loss": 0.041833531111478806, + "epoch": 5.95, + "learning_rate": 2.741064819222264e-06, + "loss": 0.0408, + "step": 6266, + "task_loss": 0.03170401230454445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983188779422363, + "compression_loss": 0.0, + "distillation_loss": 0.024519365280866623, + "epoch": 5.95, + "learning_rate": 2.7362159102792605e-06, + "loss": 0.0226, + "step": 6267, + "task_loss": 0.005678284913301468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798321213915118, + "compression_loss": 0.0, + "distillation_loss": 0.017905663698911667, + "epoch": 5.95, + "learning_rate": 2.7313710456598667e-06, + "loss": 0.0166, + "step": 6268, + "task_loss": 0.004889186471700668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983235477230574, + "compression_loss": 0.0, + "distillation_loss": 0.09741759300231934, + "epoch": 5.95, + "learning_rate": 2.72653022624417e-06, + "loss": 0.0961, + "step": 6269, + "task_loss": 0.08380945026874542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983258793670581, + "compression_loss": 0.0, + "distillation_loss": 0.028389321640133858, + "epoch": 5.95, + "learning_rate": 2.7216934529115223e-06, + "loss": 0.0266, + "step": 6270, + "task_loss": 0.010113891214132309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798328208848124, + "compression_loss": 0.0, + "distillation_loss": 0.012037597596645355, + "epoch": 5.96, + "learning_rate": 2.7168607265405517e-06, + "loss": 0.0111, + "step": 6271, + "task_loss": 0.002512754872441292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983305361672587, + "compression_loss": 0.0, + "distillation_loss": 0.016703153029084206, + "epoch": 5.96, + "learning_rate": 2.712032048009153e-06, + "loss": 0.0232, + "step": 6272, + "task_loss": 0.08192671090364456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983328613254658, + "compression_loss": 0.0, + "distillation_loss": 0.06296996772289276, + "epoch": 5.96, + "learning_rate": 2.707207418194474e-06, + "loss": 0.0594, + "step": 6273, + "task_loss": 0.026859570294618607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798335184323749, + "compression_loss": 0.0, + "distillation_loss": 0.0931231677532196, + "epoch": 5.96, + "learning_rate": 2.702386837972923e-06, + "loss": 0.095, + "step": 6274, + "task_loss": 0.11223654448986053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798337505163112, + "compression_loss": 0.0, + "distillation_loss": 0.08410904556512833, + "epoch": 5.96, + "learning_rate": 2.697570308220196e-06, + "loss": 0.0936, + "step": 6275, + "task_loss": 0.17943312227725983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983398238445586, + "compression_loss": 0.0, + "distillation_loss": 0.021530931815505028, + "epoch": 5.96, + "learning_rate": 2.6927578298112324e-06, + "loss": 0.022, + "step": 6276, + "task_loss": 0.025872595608234406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983421403690923, + "compression_loss": 0.0, + "distillation_loss": 0.1349969059228897, + "epoch": 5.96, + "learning_rate": 2.687949403620235e-06, + "loss": 0.1343, + "step": 6277, + "task_loss": 0.1277485191822052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983444547377169, + "compression_loss": 0.0, + "distillation_loss": 0.047185495495796204, + "epoch": 5.96, + "learning_rate": 2.6831450305206896e-06, + "loss": 0.044, + "step": 6278, + "task_loss": 0.015110282227396965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798346766951436, + "compression_loss": 0.0, + "distillation_loss": 0.028054438531398773, + "epoch": 5.96, + "learning_rate": 2.6783447113853305e-06, + "loss": 0.026, + "step": 6279, + "task_loss": 0.00766693614423275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983490770112535, + "compression_loss": 0.0, + "distillation_loss": 0.01661674678325653, + "epoch": 5.96, + "learning_rate": 2.673548447086152e-06, + "loss": 0.0274, + "step": 6280, + "task_loss": 0.12479189783334732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983513849181727, + "compression_loss": 0.0, + "distillation_loss": 0.012853579595685005, + "epoch": 5.96, + "learning_rate": 2.6687562384944216e-06, + "loss": 0.0221, + "step": 6281, + "task_loss": 0.10521620512008667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983536906731976, + "compression_loss": 0.0, + "distillation_loss": 0.032144561409950256, + "epoch": 5.97, + "learning_rate": 2.6639680864806754e-06, + "loss": 0.0308, + "step": 6282, + "task_loss": 0.01860850676894188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983559942773317, + "compression_loss": 0.0, + "distillation_loss": 0.023536494001746178, + "epoch": 5.97, + "learning_rate": 2.659183991914696e-06, + "loss": 0.027, + "step": 6283, + "task_loss": 0.05826903134584427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983582957315788, + "compression_loss": 0.0, + "distillation_loss": 0.04953496530652046, + "epoch": 5.97, + "learning_rate": 2.6544039556655354e-06, + "loss": 0.054, + "step": 6284, + "task_loss": 0.09401563555002213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983605950369426, + "compression_loss": 0.0, + "distillation_loss": 0.011628339067101479, + "epoch": 5.97, + "learning_rate": 2.6496279786015184e-06, + "loss": 0.0187, + "step": 6285, + "task_loss": 0.08250655233860016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983628921944266, + "compression_loss": 0.0, + "distillation_loss": 0.03543800860643387, + "epoch": 5.97, + "learning_rate": 2.6448560615902202e-06, + "loss": 0.0324, + "step": 6286, + "task_loss": 0.0052569154649972916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983651872050347, + "compression_loss": 0.0, + "distillation_loss": 0.015366720035672188, + "epoch": 5.97, + "learning_rate": 2.640088205498481e-06, + "loss": 0.0165, + "step": 6287, + "task_loss": 0.0262142401188612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983674800697704, + "compression_loss": 0.0, + "distillation_loss": 0.04069266468286514, + "epoch": 5.97, + "learning_rate": 2.635324411192397e-06, + "loss": 0.042, + "step": 6288, + "task_loss": 0.054010894149541855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983697707896376, + "compression_loss": 0.0, + "distillation_loss": 0.08131635189056396, + "epoch": 5.97, + "learning_rate": 2.6305646795373507e-06, + "loss": 0.0923, + "step": 6289, + "task_loss": 0.19130820035934448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983720593656397, + "compression_loss": 0.0, + "distillation_loss": 0.022895563393831253, + "epoch": 5.97, + "learning_rate": 2.6258090113979646e-06, + "loss": 0.0363, + "step": 6290, + "task_loss": 0.15735076367855072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983743457987806, + "compression_loss": 0.0, + "distillation_loss": 0.018831467255949974, + "epoch": 5.97, + "learning_rate": 2.6210574076381224e-06, + "loss": 0.0432, + "step": 6291, + "task_loss": 0.26204773783683777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798376630090064, + "compression_loss": 0.0, + "distillation_loss": 0.0476510114967823, + "epoch": 5.98, + "learning_rate": 2.616309869120984e-06, + "loss": 0.0463, + "step": 6292, + "task_loss": 0.033927060663700104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983789122404933, + "compression_loss": 0.0, + "distillation_loss": 0.04823393374681473, + "epoch": 5.98, + "learning_rate": 2.6115663967089587e-06, + "loss": 0.0457, + "step": 6293, + "task_loss": 0.022519726306200027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983811922510725, + "compression_loss": 0.0, + "distillation_loss": 0.02489767223596573, + "epoch": 5.98, + "learning_rate": 2.6068269912637224e-06, + "loss": 0.0393, + "step": 6294, + "task_loss": 0.16874471306800842 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983834701228052, + "compression_loss": 0.0, + "distillation_loss": 0.01917928084731102, + "epoch": 5.98, + "learning_rate": 2.602091653646205e-06, + "loss": 0.0256, + "step": 6295, + "task_loss": 0.08345439285039902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798385745856695, + "compression_loss": 0.0, + "distillation_loss": 0.011729862540960312, + "epoch": 5.98, + "learning_rate": 2.597360384716613e-06, + "loss": 0.0111, + "step": 6296, + "task_loss": 0.00561935268342495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983880194537456, + "compression_loss": 0.0, + "distillation_loss": 0.08482478559017181, + "epoch": 5.98, + "learning_rate": 2.592633185334395e-06, + "loss": 0.0894, + "step": 6297, + "task_loss": 0.1304791271686554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983902909149607, + "compression_loss": 0.0, + "distillation_loss": 0.0635722205042839, + "epoch": 5.98, + "learning_rate": 2.5879100563582724e-06, + "loss": 0.0594, + "step": 6298, + "task_loss": 0.022111881524324417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798392560241344, + "compression_loss": 0.0, + "distillation_loss": 0.023897292092442513, + "epoch": 5.98, + "learning_rate": 2.5831909986462318e-06, + "loss": 0.0219, + "step": 6299, + "task_loss": 0.004307908937335014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983948274338992, + "compression_loss": 0.0, + "distillation_loss": 0.024204891175031662, + "epoch": 5.98, + "learning_rate": 2.5784760130555057e-06, + "loss": 0.0237, + "step": 6300, + "task_loss": 0.018668916076421738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983970924936299, + "compression_loss": 0.0, + "distillation_loss": 0.018057633191347122, + "epoch": 5.98, + "learning_rate": 2.573765100442596e-06, + "loss": 0.0167, + "step": 6301, + "task_loss": 0.004313705489039421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7983993554215398, + "compression_loss": 0.0, + "distillation_loss": 0.05510625243186951, + "epoch": 5.98, + "learning_rate": 2.569058261663257e-06, + "loss": 0.0541, + "step": 6302, + "task_loss": 0.04473312571644783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984016162186327, + "compression_loss": 0.0, + "distillation_loss": 0.016285603865981102, + "epoch": 5.99, + "learning_rate": 2.5643554975725182e-06, + "loss": 0.0216, + "step": 6303, + "task_loss": 0.0692404955625534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984038748859122, + "compression_loss": 0.0, + "distillation_loss": 0.021900523453950882, + "epoch": 5.99, + "learning_rate": 2.5596568090246548e-06, + "loss": 0.0205, + "step": 6304, + "task_loss": 0.007533866912126541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798406131424382, + "compression_loss": 0.0, + "distillation_loss": 0.0129203200340271, + "epoch": 5.99, + "learning_rate": 2.5549621968732005e-06, + "loss": 0.0236, + "step": 6305, + "task_loss": 0.12000947445631027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984083858350457, + "compression_loss": 0.0, + "distillation_loss": 0.02725234068930149, + "epoch": 5.99, + "learning_rate": 2.5502716619709598e-06, + "loss": 0.0256, + "step": 6306, + "task_loss": 0.01108565554022789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984106381189071, + "compression_loss": 0.0, + "distillation_loss": 0.03494996577501297, + "epoch": 5.99, + "learning_rate": 2.545585205169995e-06, + "loss": 0.0337, + "step": 6307, + "task_loss": 0.022902732715010643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984128882769698, + "compression_loss": 0.0, + "distillation_loss": 0.026792103424668312, + "epoch": 5.99, + "learning_rate": 2.54090282732162e-06, + "loss": 0.0306, + "step": 6308, + "task_loss": 0.06438016891479492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984151363102374, + "compression_loss": 0.0, + "distillation_loss": 0.015818610787391663, + "epoch": 5.99, + "learning_rate": 2.536224529276404e-06, + "loss": 0.0239, + "step": 6309, + "task_loss": 0.09637781977653503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984173822197138, + "compression_loss": 0.0, + "distillation_loss": 0.015181312337517738, + "epoch": 5.99, + "learning_rate": 2.5315503118841956e-06, + "loss": 0.0272, + "step": 6310, + "task_loss": 0.1348828375339508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984196260064026, + "compression_loss": 0.0, + "distillation_loss": 0.021186668425798416, + "epoch": 5.99, + "learning_rate": 2.5268801759940813e-06, + "loss": 0.02, + "step": 6311, + "task_loss": 0.009767893701791763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984218676713074, + "compression_loss": 0.0, + "distillation_loss": 0.058261334896087646, + "epoch": 5.99, + "learning_rate": 2.522214122454411e-06, + "loss": 0.0564, + "step": 6312, + "task_loss": 0.04001392796635628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.798424107215432, + "compression_loss": 0.0, + "distillation_loss": 0.02801283821463585, + "epoch": 6.0, + "learning_rate": 2.5175521521128035e-06, + "loss": 0.0276, + "step": 6313, + "task_loss": 0.023807687684893608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.79842634463978, + "compression_loss": 0.0, + "distillation_loss": 0.166382297873497, + "epoch": 6.0, + "learning_rate": 2.512894265816121e-06, + "loss": 0.165, + "step": 6314, + "task_loss": 0.15275904536247253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984285799453551, + "compression_loss": 0.0, + "distillation_loss": 0.06544499844312668, + "epoch": 6.0, + "learning_rate": 2.5082404644104995e-06, + "loss": 0.0815, + "step": 6315, + "task_loss": 0.22553624212741852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984308131331609, + "compression_loss": 0.0, + "distillation_loss": 0.05184311419725418, + "epoch": 6.0, + "learning_rate": 2.503590748741311e-06, + "loss": 0.0555, + "step": 6316, + "task_loss": 0.0887538492679596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984330442042012, + "compression_loss": 0.0, + "distillation_loss": 0.023621458560228348, + "epoch": 6.0, + "learning_rate": 2.498945119653215e-06, + "loss": 0.0217, + "step": 6317, + "task_loss": 0.00461687333881855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984352731594797, + "compression_loss": 0.0, + "distillation_loss": 0.03740524873137474, + "epoch": 6.0, + "learning_rate": 2.4943035779901008e-06, + "loss": 0.0582, + "step": 6318, + "task_loss": 0.24517786502838135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984375, + "compression_loss": 0.0, + "distillation_loss": 0.23347151279449463, + "epoch": 6.0, + "learning_rate": 2.4896661245951275e-06, + "loss": 0.2215, + "step": 6319, + "task_loss": 0.11372692137956619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984397247267658, + "compression_loss": 0.0, + "distillation_loss": 0.3618473410606384, + "epoch": 6.0, + "learning_rate": 2.485032760310718e-06, + "loss": 0.3399, + "step": 6320, + "task_loss": 0.14205172657966614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984419473407809, + "compression_loss": 0.0, + "distillation_loss": 0.2971758544445038, + "epoch": 6.0, + "learning_rate": 2.4804034859785414e-06, + "loss": 0.2839, + "step": 6321, + "task_loss": 0.16441358625888824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984441678430487, + "compression_loss": 0.0, + "distillation_loss": 0.21649004518985748, + "epoch": 6.0, + "learning_rate": 2.475778302439524e-06, + "loss": 0.2025, + "step": 6322, + "task_loss": 0.07684586942195892 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984463862345732, + "compression_loss": 0.0, + "distillation_loss": 0.23935849964618683, + "epoch": 6.0, + "learning_rate": 2.4711572105338533e-06, + "loss": 0.2226, + "step": 6323, + "task_loss": 0.07181292772293091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984486025163579, + "compression_loss": 0.0, + "distillation_loss": 0.24455556273460388, + "epoch": 6.01, + "learning_rate": 2.4665402111009814e-06, + "loss": 0.2304, + "step": 6324, + "task_loss": 0.10250243544578552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984508166894065, + "compression_loss": 0.0, + "distillation_loss": 0.3571939468383789, + "epoch": 6.01, + "learning_rate": 2.4619273049796e-06, + "loss": 0.3339, + "step": 6325, + "task_loss": 0.12418486177921295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984530287547227, + "compression_loss": 0.0, + "distillation_loss": 0.22672238945960999, + "epoch": 6.01, + "learning_rate": 2.4573184930076655e-06, + "loss": 0.2185, + "step": 6326, + "task_loss": 0.14480708539485931 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984552387133103, + "compression_loss": 0.0, + "distillation_loss": 0.2425723820924759, + "epoch": 6.01, + "learning_rate": 2.4527137760224e-06, + "loss": 0.2238, + "step": 6327, + "task_loss": 0.05520417168736458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984574465661727, + "compression_loss": 0.0, + "distillation_loss": 0.2952827215194702, + "epoch": 6.01, + "learning_rate": 2.4481131548602627e-06, + "loss": 0.2822, + "step": 6328, + "task_loss": 0.16485857963562012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984596523143138, + "compression_loss": 0.0, + "distillation_loss": 0.24353119730949402, + "epoch": 6.01, + "learning_rate": 2.443516630356979e-06, + "loss": 0.2293, + "step": 6329, + "task_loss": 0.10098670423030853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984618559587373, + "compression_loss": 0.0, + "distillation_loss": 0.2624762952327728, + "epoch": 6.01, + "learning_rate": 2.4389242033475366e-06, + "loss": 0.2415, + "step": 6330, + "task_loss": 0.05250587314367294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984640575004468, + "compression_loss": 0.0, + "distillation_loss": 0.20973241329193115, + "epoch": 6.01, + "learning_rate": 2.4343358746661686e-06, + "loss": 0.1955, + "step": 6331, + "task_loss": 0.06739503890275955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984662569404459, + "compression_loss": 0.0, + "distillation_loss": 0.26178908348083496, + "epoch": 6.01, + "learning_rate": 2.4297516451463608e-06, + "loss": 0.2473, + "step": 6332, + "task_loss": 0.11708267778158188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984684542797384, + "compression_loss": 0.0, + "distillation_loss": 0.18669354915618896, + "epoch": 6.01, + "learning_rate": 2.4251715156208665e-06, + "loss": 0.1795, + "step": 6333, + "task_loss": 0.11463765054941177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984706495193281, + "compression_loss": 0.0, + "distillation_loss": 0.305093377828598, + "epoch": 6.02, + "learning_rate": 2.4205954869216922e-06, + "loss": 0.289, + "step": 6334, + "task_loss": 0.14380815625190735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984728426602185, + "compression_loss": 0.0, + "distillation_loss": 0.15407568216323853, + "epoch": 6.02, + "learning_rate": 2.416023559880093e-06, + "loss": 0.1493, + "step": 6335, + "task_loss": 0.1060083881020546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984750337034132, + "compression_loss": 0.0, + "distillation_loss": 0.20316243171691895, + "epoch": 6.02, + "learning_rate": 2.4114557353265733e-06, + "loss": 0.1901, + "step": 6336, + "task_loss": 0.0729086771607399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798477222649916, + "compression_loss": 0.0, + "distillation_loss": 0.26141518354415894, + "epoch": 6.02, + "learning_rate": 2.4068920140909114e-06, + "loss": 0.2509, + "step": 6337, + "task_loss": 0.15607582032680511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984794095007308, + "compression_loss": 0.0, + "distillation_loss": 0.22481253743171692, + "epoch": 6.02, + "learning_rate": 2.4023323970021273e-06, + "loss": 0.2087, + "step": 6338, + "task_loss": 0.06392265856266022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984815942568609, + "compression_loss": 0.0, + "distillation_loss": 0.16369011998176575, + "epoch": 6.02, + "learning_rate": 2.397776884888489e-06, + "loss": 0.1505, + "step": 6339, + "task_loss": 0.03216647729277611 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984837769193103, + "compression_loss": 0.0, + "distillation_loss": 0.22370657324790955, + "epoch": 6.02, + "learning_rate": 2.393225478577532e-06, + "loss": 0.2189, + "step": 6340, + "task_loss": 0.17515668272972107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984859574890824, + "compression_loss": 0.0, + "distillation_loss": 0.22182436287403107, + "epoch": 6.02, + "learning_rate": 2.3886781788960477e-06, + "loss": 0.2059, + "step": 6341, + "task_loss": 0.06226883456110954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798488135967181, + "compression_loss": 0.0, + "distillation_loss": 0.19668897986412048, + "epoch": 6.02, + "learning_rate": 2.384134986670067e-06, + "loss": 0.1828, + "step": 6342, + "task_loss": 0.0580269992351532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984903123546099, + "compression_loss": 0.0, + "distillation_loss": 0.16411647200584412, + "epoch": 6.02, + "learning_rate": 2.3795959027248847e-06, + "loss": 0.1578, + "step": 6343, + "task_loss": 0.10057196021080017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984924866523726, + "compression_loss": 0.0, + "distillation_loss": 0.21145261824131012, + "epoch": 6.02, + "learning_rate": 2.3750609278850505e-06, + "loss": 0.1984, + "step": 6344, + "task_loss": 0.08086170256137848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984946588614729, + "compression_loss": 0.0, + "distillation_loss": 0.12663835287094116, + "epoch": 6.03, + "learning_rate": 2.3705300629743606e-06, + "loss": 0.1277, + "step": 6345, + "task_loss": 0.13693031668663025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984968289829144, + "compression_loss": 0.0, + "distillation_loss": 0.19469054043293, + "epoch": 6.03, + "learning_rate": 2.3660033088158647e-06, + "loss": 0.179, + "step": 6346, + "task_loss": 0.037706997245550156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7984989970177009, + "compression_loss": 0.0, + "distillation_loss": 0.16001702845096588, + "epoch": 6.03, + "learning_rate": 2.36148066623188e-06, + "loss": 0.1503, + "step": 6347, + "task_loss": 0.06241552159190178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798501162966836, + "compression_loss": 0.0, + "distillation_loss": 0.2323281466960907, + "epoch": 6.03, + "learning_rate": 2.356962136043961e-06, + "loss": 0.2186, + "step": 6348, + "task_loss": 0.09503776580095291 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985033268313234, + "compression_loss": 0.0, + "distillation_loss": 0.18243101239204407, + "epoch": 6.03, + "learning_rate": 2.3524477190729144e-06, + "loss": 0.1675, + "step": 6349, + "task_loss": 0.03352750092744827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985054886121667, + "compression_loss": 0.0, + "distillation_loss": 0.24202725291252136, + "epoch": 6.03, + "learning_rate": 2.3479374161388124e-06, + "loss": 0.2333, + "step": 6350, + "task_loss": 0.1544978767633438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985076483103698, + "compression_loss": 0.0, + "distillation_loss": 0.2221883088350296, + "epoch": 6.03, + "learning_rate": 2.343431228060977e-06, + "loss": 0.2053, + "step": 6351, + "task_loss": 0.05342654138803482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985098059269361, + "compression_loss": 0.0, + "distillation_loss": 0.17091111838817596, + "epoch": 6.03, + "learning_rate": 2.3389291556579732e-06, + "loss": 0.1579, + "step": 6352, + "task_loss": 0.0405244342982769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985119614628695, + "compression_loss": 0.0, + "distillation_loss": 0.25911980867385864, + "epoch": 6.03, + "learning_rate": 2.334431199747622e-06, + "loss": 0.2423, + "step": 6353, + "task_loss": 0.09104666113853455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985141149191736, + "compression_loss": 0.0, + "distillation_loss": 0.09606660157442093, + "epoch": 6.03, + "learning_rate": 2.3299373611470053e-06, + "loss": 0.0913, + "step": 6354, + "task_loss": 0.04837304726243019 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798516266296852, + "compression_loss": 0.0, + "distillation_loss": 0.15584638714790344, + "epoch": 6.04, + "learning_rate": 2.3254476406724483e-06, + "loss": 0.1464, + "step": 6355, + "task_loss": 0.06180054694414139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985184155969085, + "compression_loss": 0.0, + "distillation_loss": 0.11610227823257446, + "epoch": 6.04, + "learning_rate": 2.3209620391395236e-06, + "loss": 0.1073, + "step": 6356, + "task_loss": 0.027644779533147812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985205628203469, + "compression_loss": 0.0, + "distillation_loss": 0.16248267889022827, + "epoch": 6.04, + "learning_rate": 2.316480557363071e-06, + "loss": 0.1549, + "step": 6357, + "task_loss": 0.08620963990688324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985227079681706, + "compression_loss": 0.0, + "distillation_loss": 0.14060789346694946, + "epoch": 6.04, + "learning_rate": 2.3120031961571697e-06, + "loss": 0.1372, + "step": 6358, + "task_loss": 0.10676179826259613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985248510413834, + "compression_loss": 0.0, + "distillation_loss": 0.09738312661647797, + "epoch": 6.04, + "learning_rate": 2.3075299563351554e-06, + "loss": 0.0958, + "step": 6359, + "task_loss": 0.08182113617658615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798526992040989, + "compression_loss": 0.0, + "distillation_loss": 0.11283515393733978, + "epoch": 6.04, + "learning_rate": 2.3030608387096087e-06, + "loss": 0.1166, + "step": 6360, + "task_loss": 0.1502501219511032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798529130967991, + "compression_loss": 0.0, + "distillation_loss": 0.1467873603105545, + "epoch": 6.04, + "learning_rate": 2.298595844092377e-06, + "loss": 0.1372, + "step": 6361, + "task_loss": 0.05081057548522949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985312678233933, + "compression_loss": 0.0, + "distillation_loss": 0.14234556257724762, + "epoch": 6.04, + "learning_rate": 2.29413497329454e-06, + "loss": 0.138, + "step": 6362, + "task_loss": 0.09874287247657776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985334026081995, + "compression_loss": 0.0, + "distillation_loss": 0.14752641320228577, + "epoch": 6.04, + "learning_rate": 2.289678227126432e-06, + "loss": 0.1442, + "step": 6363, + "task_loss": 0.11388550698757172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798535535323413, + "compression_loss": 0.0, + "distillation_loss": 0.11193514615297318, + "epoch": 6.04, + "learning_rate": 2.2852256063976537e-06, + "loss": 0.1022, + "step": 6364, + "task_loss": 0.014793351292610168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798537665970038, + "compression_loss": 0.0, + "distillation_loss": 0.08797906339168549, + "epoch": 6.04, + "learning_rate": 2.28077711191704e-06, + "loss": 0.0801, + "step": 6365, + "task_loss": 0.009286869317293167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985397945490776, + "compression_loss": 0.0, + "distillation_loss": 0.09950749576091766, + "epoch": 6.05, + "learning_rate": 2.276332744492676e-06, + "loss": 0.0956, + "step": 6366, + "task_loss": 0.06009618192911148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985419210615359, + "compression_loss": 0.0, + "distillation_loss": 0.1407734453678131, + "epoch": 6.05, + "learning_rate": 2.271892504931905e-06, + "loss": 0.1377, + "step": 6367, + "task_loss": 0.1104123517870903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985440455084164, + "compression_loss": 0.0, + "distillation_loss": 0.12445957213640213, + "epoch": 6.05, + "learning_rate": 2.2674563940413283e-06, + "loss": 0.1133, + "step": 6368, + "task_loss": 0.012716732919216156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798546167890723, + "compression_loss": 0.0, + "distillation_loss": 0.13971540331840515, + "epoch": 6.05, + "learning_rate": 2.263024412626777e-06, + "loss": 0.1393, + "step": 6369, + "task_loss": 0.13585472106933594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798548288209459, + "compression_loss": 0.0, + "distillation_loss": 0.09350334107875824, + "epoch": 6.05, + "learning_rate": 2.2585965614933395e-06, + "loss": 0.089, + "step": 6370, + "task_loss": 0.048799920827150345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985504064656284, + "compression_loss": 0.0, + "distillation_loss": 0.13269993662834167, + "epoch": 6.05, + "learning_rate": 2.254172841445365e-06, + "loss": 0.1258, + "step": 6371, + "task_loss": 0.06413581967353821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985525226602348, + "compression_loss": 0.0, + "distillation_loss": 0.11976936459541321, + "epoch": 6.05, + "learning_rate": 2.249753253286441e-06, + "loss": 0.1226, + "step": 6372, + "task_loss": 0.14833396673202515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985546367942818, + "compression_loss": 0.0, + "distillation_loss": 0.11396154761314392, + "epoch": 6.05, + "learning_rate": 2.2453377978194024e-06, + "loss": 0.1091, + "step": 6373, + "task_loss": 0.06537455320358276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985567488687731, + "compression_loss": 0.0, + "distillation_loss": 0.10704049468040466, + "epoch": 6.05, + "learning_rate": 2.2409264758463363e-06, + "loss": 0.0984, + "step": 6374, + "task_loss": 0.02111482247710228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985588588847126, + "compression_loss": 0.0, + "distillation_loss": 0.1097719818353653, + "epoch": 6.05, + "learning_rate": 2.2365192881685843e-06, + "loss": 0.1024, + "step": 6375, + "task_loss": 0.03654416650533676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985609668431037, + "compression_loss": 0.0, + "distillation_loss": 0.10831979662179947, + "epoch": 6.06, + "learning_rate": 2.232116235586737e-06, + "loss": 0.1068, + "step": 6376, + "task_loss": 0.0928025022149086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985630727449502, + "compression_loss": 0.0, + "distillation_loss": 0.0745440348982811, + "epoch": 6.06, + "learning_rate": 2.2277173189006266e-06, + "loss": 0.0728, + "step": 6377, + "task_loss": 0.05667824670672417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985651765912558, + "compression_loss": 0.0, + "distillation_loss": 0.09890006482601166, + "epoch": 6.06, + "learning_rate": 2.223322538909339e-06, + "loss": 0.0972, + "step": 6378, + "task_loss": 0.0820925384759903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985672783830241, + "compression_loss": 0.0, + "distillation_loss": 0.06193868815898895, + "epoch": 6.06, + "learning_rate": 2.2189318964112054e-06, + "loss": 0.0637, + "step": 6379, + "task_loss": 0.07989262044429779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985693781212589, + "compression_loss": 0.0, + "distillation_loss": 0.09831628203392029, + "epoch": 6.06, + "learning_rate": 2.2145453922038093e-06, + "loss": 0.1028, + "step": 6380, + "task_loss": 0.14268594980239868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985714758069639, + "compression_loss": 0.0, + "distillation_loss": 0.1020318865776062, + "epoch": 6.06, + "learning_rate": 2.210163027083975e-06, + "loss": 0.0982, + "step": 6381, + "task_loss": 0.06365399807691574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985735714411425, + "compression_loss": 0.0, + "distillation_loss": 0.12122917175292969, + "epoch": 6.06, + "learning_rate": 2.205784801847785e-06, + "loss": 0.1198, + "step": 6382, + "task_loss": 0.10688640177249908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985756650247987, + "compression_loss": 0.0, + "distillation_loss": 0.11438720673322678, + "epoch": 6.06, + "learning_rate": 2.2014107172905666e-06, + "loss": 0.1126, + "step": 6383, + "task_loss": 0.0968388095498085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798577756558936, + "compression_loss": 0.0, + "distillation_loss": 0.15790635347366333, + "epoch": 6.06, + "learning_rate": 2.197040774206882e-06, + "loss": 0.1469, + "step": 6384, + "task_loss": 0.0477466806769371 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985798460445582, + "compression_loss": 0.0, + "distillation_loss": 0.16549362242221832, + "epoch": 6.06, + "learning_rate": 2.192674973390568e-06, + "loss": 0.1518, + "step": 6385, + "task_loss": 0.028843212872743607 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985819334826689, + "compression_loss": 0.0, + "distillation_loss": 0.08308826386928558, + "epoch": 6.06, + "learning_rate": 2.188313315634688e-06, + "loss": 0.0802, + "step": 6386, + "task_loss": 0.05457150936126709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985840188742719, + "compression_loss": 0.0, + "distillation_loss": 0.10413433611392975, + "epoch": 6.07, + "learning_rate": 2.1839558017315547e-06, + "loss": 0.0995, + "step": 6387, + "task_loss": 0.057557813823223114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985861022203707, + "compression_loss": 0.0, + "distillation_loss": 0.0583919882774353, + "epoch": 6.07, + "learning_rate": 2.1796024324727297e-06, + "loss": 0.0768, + "step": 6388, + "task_loss": 0.24291850626468658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985881835219691, + "compression_loss": 0.0, + "distillation_loss": 0.07095431536436081, + "epoch": 6.07, + "learning_rate": 2.1752532086490333e-06, + "loss": 0.0648, + "step": 6389, + "task_loss": 0.009517394006252289 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985902627800708, + "compression_loss": 0.0, + "distillation_loss": 0.13259541988372803, + "epoch": 6.07, + "learning_rate": 2.1709081310505143e-06, + "loss": 0.1337, + "step": 6390, + "task_loss": 0.14401474595069885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985923399956794, + "compression_loss": 0.0, + "distillation_loss": 0.06342358887195587, + "epoch": 6.07, + "learning_rate": 2.1665672004664765e-06, + "loss": 0.0617, + "step": 6391, + "task_loss": 0.04634593799710274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985944151697987, + "compression_loss": 0.0, + "distillation_loss": 0.14910653233528137, + "epoch": 6.07, + "learning_rate": 2.1622304176854736e-06, + "loss": 0.1597, + "step": 6392, + "task_loss": 0.25551342964172363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985964883034322, + "compression_loss": 0.0, + "distillation_loss": 0.054450199007987976, + "epoch": 6.07, + "learning_rate": 2.1578977834953053e-06, + "loss": 0.05, + "step": 6393, + "task_loss": 0.010282203555107117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7985985593975837, + "compression_loss": 0.0, + "distillation_loss": 0.18467079102993011, + "epoch": 6.07, + "learning_rate": 2.153569298683017e-06, + "loss": 0.1853, + "step": 6394, + "task_loss": 0.1906275451183319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986006284532569, + "compression_loss": 0.0, + "distillation_loss": 0.09689827263355255, + "epoch": 6.07, + "learning_rate": 2.149244964034888e-06, + "loss": 0.0951, + "step": 6395, + "task_loss": 0.07894426584243774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986026954714555, + "compression_loss": 0.0, + "distillation_loss": 0.043446026742458344, + "epoch": 6.07, + "learning_rate": 2.1449247803364687e-06, + "loss": 0.0397, + "step": 6396, + "task_loss": 0.006183173507452011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798604760453183, + "compression_loss": 0.0, + "distillation_loss": 0.05788339674472809, + "epoch": 6.08, + "learning_rate": 2.140608748372533e-06, + "loss": 0.0535, + "step": 6397, + "task_loss": 0.013864126056432724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986068233994433, + "compression_loss": 0.0, + "distillation_loss": 0.06160301715135574, + "epoch": 6.08, + "learning_rate": 2.136296868927104e-06, + "loss": 0.0612, + "step": 6398, + "task_loss": 0.057745207101106644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79860888431124, + "compression_loss": 0.0, + "distillation_loss": 0.06886729598045349, + "epoch": 6.08, + "learning_rate": 2.1319891427834664e-06, + "loss": 0.0634, + "step": 6399, + "task_loss": 0.014371348544955254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986109431895767, + "compression_loss": 0.0, + "distillation_loss": 0.05757201835513115, + "epoch": 6.08, + "learning_rate": 2.127685570724136e-06, + "loss": 0.0528, + "step": 6400, + "task_loss": 0.009750045835971832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986130000354573, + "compression_loss": 0.0, + "distillation_loss": 0.1767224222421646, + "epoch": 6.08, + "learning_rate": 2.1233861535308706e-06, + "loss": 0.1751, + "step": 6401, + "task_loss": 0.1601455807685852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986150548498853, + "compression_loss": 0.0, + "distillation_loss": 0.08510977774858475, + "epoch": 6.08, + "learning_rate": 2.1190908919846875e-06, + "loss": 0.08, + "step": 6402, + "task_loss": 0.03425342217087746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986171076338643, + "compression_loss": 0.0, + "distillation_loss": 0.10737930238246918, + "epoch": 6.08, + "learning_rate": 2.1147997868658425e-06, + "loss": 0.1043, + "step": 6403, + "task_loss": 0.0761018693447113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986191583883983, + "compression_loss": 0.0, + "distillation_loss": 0.265876829624176, + "epoch": 6.08, + "learning_rate": 2.110512838953832e-06, + "loss": 0.2627, + "step": 6404, + "task_loss": 0.23371505737304688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986212071144907, + "compression_loss": 0.0, + "distillation_loss": 0.0776783674955368, + "epoch": 6.08, + "learning_rate": 2.1062300490273968e-06, + "loss": 0.0801, + "step": 6405, + "task_loss": 0.10176847875118256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986232538131453, + "compression_loss": 0.0, + "distillation_loss": 0.04853527992963791, + "epoch": 6.08, + "learning_rate": 2.1019514178645367e-06, + "loss": 0.0567, + "step": 6406, + "task_loss": 0.13034237921237946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986252984853657, + "compression_loss": 0.0, + "distillation_loss": 0.04432214796543121, + "epoch": 6.08, + "learning_rate": 2.0976769462424774e-06, + "loss": 0.0406, + "step": 6407, + "task_loss": 0.007335290312767029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986273411321556, + "compression_loss": 0.0, + "distillation_loss": 0.05146826058626175, + "epoch": 6.09, + "learning_rate": 2.0934066349376975e-06, + "loss": 0.0476, + "step": 6408, + "task_loss": 0.012544374912977219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986293817545188, + "compression_loss": 0.0, + "distillation_loss": 0.08119907975196838, + "epoch": 6.09, + "learning_rate": 2.0891404847259267e-06, + "loss": 0.0824, + "step": 6409, + "task_loss": 0.09306506812572479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986314203534588, + "compression_loss": 0.0, + "distillation_loss": 0.06213619187474251, + "epoch": 6.09, + "learning_rate": 2.08487849638212e-06, + "loss": 0.0582, + "step": 6410, + "task_loss": 0.023112241178750992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986334569299796, + "compression_loss": 0.0, + "distillation_loss": 0.06045207008719444, + "epoch": 6.09, + "learning_rate": 2.0806206706804998e-06, + "loss": 0.0657, + "step": 6411, + "task_loss": 0.11293409764766693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986354914850844, + "compression_loss": 0.0, + "distillation_loss": 0.06079762801527977, + "epoch": 6.09, + "learning_rate": 2.0763670083945114e-06, + "loss": 0.0659, + "step": 6412, + "task_loss": 0.11149650812149048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986375240197773, + "compression_loss": 0.0, + "distillation_loss": 0.1584654152393341, + "epoch": 6.09, + "learning_rate": 2.0721175102968616e-06, + "loss": 0.147, + "step": 6413, + "task_loss": 0.043657951056957245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986395545350619, + "compression_loss": 0.0, + "distillation_loss": 0.06325273215770721, + "epoch": 6.09, + "learning_rate": 2.067872177159488e-06, + "loss": 0.059, + "step": 6414, + "task_loss": 0.020560430362820625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986415830319417, + "compression_loss": 0.0, + "distillation_loss": 0.050118543207645416, + "epoch": 6.09, + "learning_rate": 2.0636310097535724e-06, + "loss": 0.0472, + "step": 6415, + "task_loss": 0.020842991769313812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986436095114204, + "compression_loss": 0.0, + "distillation_loss": 0.06810344755649567, + "epoch": 6.09, + "learning_rate": 2.0593940088495495e-06, + "loss": 0.0687, + "step": 6416, + "task_loss": 0.07414126396179199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798645633974502, + "compression_loss": 0.0, + "distillation_loss": 0.10694337636232376, + "epoch": 6.09, + "learning_rate": 2.055161175217091e-06, + "loss": 0.1054, + "step": 6417, + "task_loss": 0.09133277088403702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986476564221897, + "compression_loss": 0.0, + "distillation_loss": 0.05698537826538086, + "epoch": 6.09, + "learning_rate": 2.0509325096251004e-06, + "loss": 0.0552, + "step": 6418, + "task_loss": 0.03890334814786911 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986496768554876, + "compression_loss": 0.0, + "distillation_loss": 0.07449530810117722, + "epoch": 6.1, + "learning_rate": 2.046708012841744e-06, + "loss": 0.072, + "step": 6419, + "task_loss": 0.049478065222501755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986516952753993, + "compression_loss": 0.0, + "distillation_loss": 0.07264258712530136, + "epoch": 6.1, + "learning_rate": 2.042487685634428e-06, + "loss": 0.0676, + "step": 6420, + "task_loss": 0.021874142810702324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986537116829282, + "compression_loss": 0.0, + "distillation_loss": 0.09519660472869873, + "epoch": 6.1, + "learning_rate": 2.038271528769786e-06, + "loss": 0.1082, + "step": 6421, + "task_loss": 0.2249455749988556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986557260790783, + "compression_loss": 0.0, + "distillation_loss": 0.03500593453645706, + "epoch": 6.1, + "learning_rate": 2.034059543013703e-06, + "loss": 0.0322, + "step": 6422, + "task_loss": 0.006475191563367844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986577384648531, + "compression_loss": 0.0, + "distillation_loss": 0.0777549147605896, + "epoch": 6.1, + "learning_rate": 2.029851729131313e-06, + "loss": 0.081, + "step": 6423, + "task_loss": 0.10977138578891754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986597488412565, + "compression_loss": 0.0, + "distillation_loss": 0.12887020409107208, + "epoch": 6.1, + "learning_rate": 2.0256480878869834e-06, + "loss": 0.1275, + "step": 6424, + "task_loss": 0.11558264493942261 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986617572092919, + "compression_loss": 0.0, + "distillation_loss": 0.1287572830915451, + "epoch": 6.1, + "learning_rate": 2.0214486200443168e-06, + "loss": 0.1278, + "step": 6425, + "task_loss": 0.11913389712572098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986637635699632, + "compression_loss": 0.0, + "distillation_loss": 0.06908180564641953, + "epoch": 6.1, + "learning_rate": 2.017253326366181e-06, + "loss": 0.065, + "step": 6426, + "task_loss": 0.027965903282165527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986657679242739, + "compression_loss": 0.0, + "distillation_loss": 0.09820552915334702, + "epoch": 6.1, + "learning_rate": 2.0130622076146576e-06, + "loss": 0.0986, + "step": 6427, + "task_loss": 0.10234321653842926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986677702732278, + "compression_loss": 0.0, + "distillation_loss": 0.0638742744922638, + "epoch": 6.1, + "learning_rate": 2.0088752645510995e-06, + "loss": 0.0609, + "step": 6428, + "task_loss": 0.03451484069228172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986697706178286, + "compression_loss": 0.0, + "distillation_loss": 0.18671873211860657, + "epoch": 6.11, + "learning_rate": 2.004692497936067e-06, + "loss": 0.194, + "step": 6429, + "task_loss": 0.259968101978302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986717689590799, + "compression_loss": 0.0, + "distillation_loss": 0.04738321155309677, + "epoch": 6.11, + "learning_rate": 2.0005139085293945e-06, + "loss": 0.0452, + "step": 6430, + "task_loss": 0.025601202622056007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986737652979855, + "compression_loss": 0.0, + "distillation_loss": 0.05693122744560242, + "epoch": 6.11, + "learning_rate": 1.996339497090138e-06, + "loss": 0.0529, + "step": 6431, + "task_loss": 0.01685180887579918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986757596355489, + "compression_loss": 0.0, + "distillation_loss": 0.0986439436674118, + "epoch": 6.11, + "learning_rate": 1.9921692643765947e-06, + "loss": 0.0996, + "step": 6432, + "task_loss": 0.10801742225885391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798677751972774, + "compression_loss": 0.0, + "distillation_loss": 0.11213336884975433, + "epoch": 6.11, + "learning_rate": 1.9880032111463166e-06, + "loss": 0.1301, + "step": 6433, + "task_loss": 0.29207485914230347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986797423106643, + "compression_loss": 0.0, + "distillation_loss": 0.035904861986637115, + "epoch": 6.11, + "learning_rate": 1.9838413381560813e-06, + "loss": 0.0345, + "step": 6434, + "task_loss": 0.021603219211101532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986817306502235, + "compression_loss": 0.0, + "distillation_loss": 0.05209188535809517, + "epoch": 6.11, + "learning_rate": 1.979683646161909e-06, + "loss": 0.0516, + "step": 6435, + "task_loss": 0.04718216508626938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986837169924554, + "compression_loss": 0.0, + "distillation_loss": 0.05253394693136215, + "epoch": 6.11, + "learning_rate": 1.975530135919068e-06, + "loss": 0.0616, + "step": 6436, + "task_loss": 0.14322909712791443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986857013383637, + "compression_loss": 0.0, + "distillation_loss": 0.0527234822511673, + "epoch": 6.11, + "learning_rate": 1.9713808081820716e-06, + "loss": 0.0584, + "step": 6437, + "task_loss": 0.1097893938422203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986876836889519, + "compression_loss": 0.0, + "distillation_loss": 0.036524612456560135, + "epoch": 6.11, + "learning_rate": 1.9672356637046584e-06, + "loss": 0.0335, + "step": 6438, + "task_loss": 0.00587356835603714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986896640452238, + "compression_loss": 0.0, + "distillation_loss": 0.0352536141872406, + "epoch": 6.11, + "learning_rate": 1.9630947032398067e-06, + "loss": 0.0478, + "step": 6439, + "task_loss": 0.16093014180660248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798691642408183, + "compression_loss": 0.0, + "distillation_loss": 0.04243883490562439, + "epoch": 6.12, + "learning_rate": 1.9589579275397562e-06, + "loss": 0.044, + "step": 6440, + "task_loss": 0.058299075812101364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986936187788334, + "compression_loss": 0.0, + "distillation_loss": 0.1163100153207779, + "epoch": 6.12, + "learning_rate": 1.9548253373559646e-06, + "loss": 0.1117, + "step": 6441, + "task_loss": 0.07047397643327713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986955931581784, + "compression_loss": 0.0, + "distillation_loss": 0.0603814423084259, + "epoch": 6.12, + "learning_rate": 1.9506969334391332e-06, + "loss": 0.0554, + "step": 6442, + "task_loss": 0.010571654886007309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986975655472218, + "compression_loss": 0.0, + "distillation_loss": 0.03320575878024101, + "epoch": 6.12, + "learning_rate": 1.9465727165392134e-06, + "loss": 0.0305, + "step": 6443, + "task_loss": 0.006414549425244331 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7986995359469674, + "compression_loss": 0.0, + "distillation_loss": 0.025436367839574814, + "epoch": 6.12, + "learning_rate": 1.942452687405383e-06, + "loss": 0.0307, + "step": 6444, + "task_loss": 0.07796012610197067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987015043584187, + "compression_loss": 0.0, + "distillation_loss": 0.04995856434106827, + "epoch": 6.12, + "learning_rate": 1.9383368467860734e-06, + "loss": 0.0519, + "step": 6445, + "task_loss": 0.06982216984033585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987034707825795, + "compression_loss": 0.0, + "distillation_loss": 0.028508171439170837, + "epoch": 6.12, + "learning_rate": 1.9342251954289346e-06, + "loss": 0.0264, + "step": 6446, + "task_loss": 0.00761030986905098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987054352204533, + "compression_loss": 0.0, + "distillation_loss": 0.06900826841592789, + "epoch": 6.12, + "learning_rate": 1.930117734080883e-06, + "loss": 0.0651, + "step": 6447, + "task_loss": 0.029789365828037262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987073976730441, + "compression_loss": 0.0, + "distillation_loss": 0.07841695845127106, + "epoch": 6.12, + "learning_rate": 1.926014463488049e-06, + "loss": 0.0737, + "step": 6448, + "task_loss": 0.031337086111307144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987093581413554, + "compression_loss": 0.0, + "distillation_loss": 0.03884424641728401, + "epoch": 6.12, + "learning_rate": 1.921915384395809e-06, + "loss": 0.0368, + "step": 6449, + "task_loss": 0.018084675073623657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987113166263907, + "compression_loss": 0.0, + "distillation_loss": 0.03938658908009529, + "epoch": 6.13, + "learning_rate": 1.917820497548789e-06, + "loss": 0.048, + "step": 6450, + "task_loss": 0.12579363584518433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987132731291541, + "compression_loss": 0.0, + "distillation_loss": 0.0376831591129303, + "epoch": 6.13, + "learning_rate": 1.913729803690839e-06, + "loss": 0.0408, + "step": 6451, + "task_loss": 0.06835294514894485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987152276506488, + "compression_loss": 0.0, + "distillation_loss": 0.07034672051668167, + "epoch": 6.13, + "learning_rate": 1.9096433035650565e-06, + "loss": 0.0667, + "step": 6452, + "task_loss": 0.03402462229132652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987171801918789, + "compression_loss": 0.0, + "distillation_loss": 0.06073524430394173, + "epoch": 6.13, + "learning_rate": 1.9055609979137634e-06, + "loss": 0.0568, + "step": 6453, + "task_loss": 0.020950062200427055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987191307538479, + "compression_loss": 0.0, + "distillation_loss": 0.08777901530265808, + "epoch": 6.13, + "learning_rate": 1.9014828874785478e-06, + "loss": 0.0853, + "step": 6454, + "task_loss": 0.0634586289525032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987210793375594, + "compression_loss": 0.0, + "distillation_loss": 0.054110340774059296, + "epoch": 6.13, + "learning_rate": 1.8974089730002087e-06, + "loss": 0.0514, + "step": 6455, + "task_loss": 0.026968229562044144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987230259440173, + "compression_loss": 0.0, + "distillation_loss": 0.03255057334899902, + "epoch": 6.13, + "learning_rate": 1.89333925521879e-06, + "loss": 0.0306, + "step": 6456, + "task_loss": 0.012770043686032295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987249705742251, + "compression_loss": 0.0, + "distillation_loss": 0.05460744723677635, + "epoch": 6.13, + "learning_rate": 1.8892737348735812e-06, + "loss": 0.0586, + "step": 6457, + "task_loss": 0.09443493187427521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987269132291865, + "compression_loss": 0.0, + "distillation_loss": 0.10842868685722351, + "epoch": 6.13, + "learning_rate": 1.8852124127031022e-06, + "loss": 0.1172, + "step": 6458, + "task_loss": 0.19569729268550873 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987288539099053, + "compression_loss": 0.0, + "distillation_loss": 0.04304853081703186, + "epoch": 6.13, + "learning_rate": 1.8811552894451107e-06, + "loss": 0.042, + "step": 6459, + "task_loss": 0.03260982409119606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987307926173851, + "compression_loss": 0.0, + "distillation_loss": 0.07537424564361572, + "epoch": 6.13, + "learning_rate": 1.877102365836597e-06, + "loss": 0.0735, + "step": 6460, + "task_loss": 0.056225549429655075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987327293526295, + "compression_loss": 0.0, + "distillation_loss": 0.02507023513317108, + "epoch": 6.14, + "learning_rate": 1.8730536426138034e-06, + "loss": 0.023, + "step": 6461, + "task_loss": 0.004859650507569313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987346641166423, + "compression_loss": 0.0, + "distillation_loss": 0.06949448585510254, + "epoch": 6.14, + "learning_rate": 1.8690091205121941e-06, + "loss": 0.0698, + "step": 6462, + "task_loss": 0.07217264175415039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987365969104272, + "compression_loss": 0.0, + "distillation_loss": 0.04962468147277832, + "epoch": 6.14, + "learning_rate": 1.8649688002664756e-06, + "loss": 0.048, + "step": 6463, + "task_loss": 0.033658094704151154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987385277349879, + "compression_loss": 0.0, + "distillation_loss": 0.02775590494275093, + "epoch": 6.14, + "learning_rate": 1.8609326826106e-06, + "loss": 0.0255, + "step": 6464, + "task_loss": 0.00518425740301609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987404565913279, + "compression_loss": 0.0, + "distillation_loss": 0.029499584808945656, + "epoch": 6.14, + "learning_rate": 1.8569007682777417e-06, + "loss": 0.0272, + "step": 6465, + "task_loss": 0.006057474762201309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798742383480451, + "compression_loss": 0.0, + "distillation_loss": 0.04246009886264801, + "epoch": 6.14, + "learning_rate": 1.8528730580003178e-06, + "loss": 0.0466, + "step": 6466, + "task_loss": 0.08337102830410004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987443084033609, + "compression_loss": 0.0, + "distillation_loss": 0.08518815040588379, + "epoch": 6.14, + "learning_rate": 1.8488495525099735e-06, + "loss": 0.0868, + "step": 6467, + "task_loss": 0.10114063322544098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987462313610613, + "compression_loss": 0.0, + "distillation_loss": 0.04680711403489113, + "epoch": 6.14, + "learning_rate": 1.8448302525376132e-06, + "loss": 0.0431, + "step": 6468, + "task_loss": 0.009998075664043427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987481523545558, + "compression_loss": 0.0, + "distillation_loss": 0.07749959826469421, + "epoch": 6.14, + "learning_rate": 1.8408151588133498e-06, + "loss": 0.0829, + "step": 6469, + "task_loss": 0.13170292973518372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987500713848481, + "compression_loss": 0.0, + "distillation_loss": 0.10043874382972717, + "epoch": 6.14, + "learning_rate": 1.8368042720665446e-06, + "loss": 0.1057, + "step": 6470, + "task_loss": 0.15264543890953064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987519884529419, + "compression_loss": 0.0, + "distillation_loss": 0.10912171006202698, + "epoch": 6.15, + "learning_rate": 1.8327975930258035e-06, + "loss": 0.1048, + "step": 6471, + "task_loss": 0.0659012421965599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798753903559841, + "compression_loss": 0.0, + "distillation_loss": 0.01361011527478695, + "epoch": 6.15, + "learning_rate": 1.8287951224189553e-06, + "loss": 0.0198, + "step": 6472, + "task_loss": 0.07584784179925919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987558167065488, + "compression_loss": 0.0, + "distillation_loss": 0.14504070580005646, + "epoch": 6.15, + "learning_rate": 1.8247968609730686e-06, + "loss": 0.1404, + "step": 6473, + "task_loss": 0.09844270348548889 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987577278940693, + "compression_loss": 0.0, + "distillation_loss": 0.07923628389835358, + "epoch": 6.15, + "learning_rate": 1.8208028094144375e-06, + "loss": 0.0724, + "step": 6474, + "task_loss": 0.011291364207863808 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798759637123406, + "compression_loss": 0.0, + "distillation_loss": 0.023012623190879822, + "epoch": 6.15, + "learning_rate": 1.8168129684686148e-06, + "loss": 0.0308, + "step": 6475, + "task_loss": 0.10104191303253174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987615443955627, + "compression_loss": 0.0, + "distillation_loss": 0.037004388868808746, + "epoch": 6.15, + "learning_rate": 1.8128273388603679e-06, + "loss": 0.0342, + "step": 6476, + "task_loss": 0.008559944108128548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987634497115429, + "compression_loss": 0.0, + "distillation_loss": 0.02188132330775261, + "epoch": 6.15, + "learning_rate": 1.808845921313701e-06, + "loss": 0.0206, + "step": 6477, + "task_loss": 0.008831024169921875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987653530723503, + "compression_loss": 0.0, + "distillation_loss": 0.02209128439426422, + "epoch": 6.15, + "learning_rate": 1.8048687165518662e-06, + "loss": 0.0204, + "step": 6478, + "task_loss": 0.004884377121925354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987672544789888, + "compression_loss": 0.0, + "distillation_loss": 0.026295151561498642, + "epoch": 6.15, + "learning_rate": 1.800895725297333e-06, + "loss": 0.0385, + "step": 6479, + "task_loss": 0.14823564887046814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798769153932462, + "compression_loss": 0.0, + "distillation_loss": 0.02808169648051262, + "epoch": 6.15, + "learning_rate": 1.7969269482718265e-06, + "loss": 0.0283, + "step": 6480, + "task_loss": 0.0304415300488472 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987710514337734, + "compression_loss": 0.0, + "distillation_loss": 0.09501353651285172, + "epoch": 6.15, + "learning_rate": 1.7929623861962785e-06, + "loss": 0.1057, + "step": 6481, + "task_loss": 0.2016463577747345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987729469839269, + "compression_loss": 0.0, + "distillation_loss": 0.05659741908311844, + "epoch": 6.16, + "learning_rate": 1.789002039790888e-06, + "loss": 0.0551, + "step": 6482, + "task_loss": 0.041581884026527405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798774840583926, + "compression_loss": 0.0, + "distillation_loss": 0.02717326581478119, + "epoch": 6.16, + "learning_rate": 1.78504590977506e-06, + "loss": 0.0309, + "step": 6483, + "task_loss": 0.06475157290697098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987767322347745, + "compression_loss": 0.0, + "distillation_loss": 0.10455787181854248, + "epoch": 6.16, + "learning_rate": 1.7810939968674418e-06, + "loss": 0.1024, + "step": 6484, + "task_loss": 0.08274058252573013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987786219374761, + "compression_loss": 0.0, + "distillation_loss": 0.06307333707809448, + "epoch": 6.16, + "learning_rate": 1.7771463017859287e-06, + "loss": 0.0622, + "step": 6485, + "task_loss": 0.05453290790319443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987805096930344, + "compression_loss": 0.0, + "distillation_loss": 0.05282985419034958, + "epoch": 6.16, + "learning_rate": 1.77320282524763e-06, + "loss": 0.0512, + "step": 6486, + "task_loss": 0.03684225305914879 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987823955024531, + "compression_loss": 0.0, + "distillation_loss": 0.04597539082169533, + "epoch": 6.16, + "learning_rate": 1.7692635679688986e-06, + "loss": 0.0523, + "step": 6487, + "task_loss": 0.10876553505659103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987842793667359, + "compression_loss": 0.0, + "distillation_loss": 0.08502782136201859, + "epoch": 6.16, + "learning_rate": 1.7653285306653194e-06, + "loss": 0.0824, + "step": 6488, + "task_loss": 0.05903245136141777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987861612868866, + "compression_loss": 0.0, + "distillation_loss": 0.16358347237110138, + "epoch": 6.16, + "learning_rate": 1.7613977140517158e-06, + "loss": 0.1704, + "step": 6489, + "task_loss": 0.23221097886562347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987880412639087, + "compression_loss": 0.0, + "distillation_loss": 0.05940496176481247, + "epoch": 6.16, + "learning_rate": 1.7574711188421356e-06, + "loss": 0.0593, + "step": 6490, + "task_loss": 0.05786725506186485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987899192988059, + "compression_loss": 0.0, + "distillation_loss": 0.05588078126311302, + "epoch": 6.16, + "learning_rate": 1.7535487457498583e-06, + "loss": 0.0596, + "step": 6491, + "task_loss": 0.09317133575677872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798791795392582, + "compression_loss": 0.0, + "distillation_loss": 0.0370253287255764, + "epoch": 6.17, + "learning_rate": 1.7496305954874142e-06, + "loss": 0.0443, + "step": 6492, + "task_loss": 0.10931254923343658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987936695462405, + "compression_loss": 0.0, + "distillation_loss": 0.03799361735582352, + "epoch": 6.17, + "learning_rate": 1.7457166687665449e-06, + "loss": 0.045, + "step": 6493, + "task_loss": 0.1080305278301239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987955417607854, + "compression_loss": 0.0, + "distillation_loss": 0.0513494536280632, + "epoch": 6.17, + "learning_rate": 1.7418069662982344e-06, + "loss": 0.0493, + "step": 6494, + "task_loss": 0.03114178031682968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987974120372201, + "compression_loss": 0.0, + "distillation_loss": 0.0808219462633133, + "epoch": 6.17, + "learning_rate": 1.7379014887927064e-06, + "loss": 0.0772, + "step": 6495, + "task_loss": 0.044266168028116226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7987992803765482, + "compression_loss": 0.0, + "distillation_loss": 0.19209139049053192, + "epoch": 6.17, + "learning_rate": 1.734000236959399e-06, + "loss": 0.1894, + "step": 6496, + "task_loss": 0.16527491807937622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988011467797738, + "compression_loss": 0.0, + "distillation_loss": 0.033167578279972076, + "epoch": 6.17, + "learning_rate": 1.7301032115070003e-06, + "loss": 0.0421, + "step": 6497, + "task_loss": 0.12249952554702759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988030112479001, + "compression_loss": 0.0, + "distillation_loss": 0.061443835496902466, + "epoch": 6.17, + "learning_rate": 1.7262104131434226e-06, + "loss": 0.0669, + "step": 6498, + "task_loss": 0.11603046953678131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988048737819311, + "compression_loss": 0.0, + "distillation_loss": 0.020476579666137695, + "epoch": 6.17, + "learning_rate": 1.722321842575811e-06, + "loss": 0.0188, + "step": 6499, + "task_loss": 0.0034268908202648163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988067343828704, + "compression_loss": 0.0, + "distillation_loss": 0.0780348926782608, + "epoch": 6.17, + "learning_rate": 1.7184375005105474e-06, + "loss": 0.0799, + "step": 6500, + "task_loss": 0.09668619930744171 + }, + { + "epoch": 6.17, + "eval_accuracy": 0.8876146788990825, + "eval_loss": 0.4523507356643677, + "eval_runtime": 18.245, + "eval_samples_per_second": 47.794, + "eval_steps_per_second": 5.974, + "step": 6500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988085930517217, + "compression_loss": 0.0, + "distillation_loss": 0.019627831876277924, + "epoch": 6.17, + "learning_rate": 1.714557387653229e-06, + "loss": 0.0234, + "step": 6501, + "task_loss": 0.0573212131857872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988104497894886, + "compression_loss": 0.0, + "distillation_loss": 0.04171549528837204, + "epoch": 6.17, + "learning_rate": 1.710681504708711e-06, + "loss": 0.0404, + "step": 6502, + "task_loss": 0.028218252584338188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988123045971749, + "compression_loss": 0.0, + "distillation_loss": 0.03882744908332825, + "epoch": 6.18, + "learning_rate": 1.7068098523810611e-06, + "loss": 0.0428, + "step": 6503, + "task_loss": 0.07813085615634918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988141574757841, + "compression_loss": 0.0, + "distillation_loss": 0.028121117502450943, + "epoch": 6.18, + "learning_rate": 1.7029424313735776e-06, + "loss": 0.0338, + "step": 6504, + "task_loss": 0.08514552563428879 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988160084263201, + "compression_loss": 0.0, + "distillation_loss": 0.0896795317530632, + "epoch": 6.18, + "learning_rate": 1.6990792423888013e-06, + "loss": 0.1013, + "step": 6505, + "task_loss": 0.20579922199249268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988178574497864, + "compression_loss": 0.0, + "distillation_loss": 0.09015851467847824, + "epoch": 6.18, + "learning_rate": 1.6952202861285044e-06, + "loss": 0.0904, + "step": 6506, + "task_loss": 0.09299979358911514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988197045471869, + "compression_loss": 0.0, + "distillation_loss": 0.21845272183418274, + "epoch": 6.18, + "learning_rate": 1.6913655632936787e-06, + "loss": 0.2116, + "step": 6507, + "task_loss": 0.1496710181236267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798821549719525, + "compression_loss": 0.0, + "distillation_loss": 0.05457516014575958, + "epoch": 6.18, + "learning_rate": 1.6875150745845503e-06, + "loss": 0.0568, + "step": 6508, + "task_loss": 0.07647211849689484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988233929678046, + "compression_loss": 0.0, + "distillation_loss": 0.059655383229255676, + "epoch": 6.18, + "learning_rate": 1.6836688207005846e-06, + "loss": 0.0653, + "step": 6509, + "task_loss": 0.11646562069654465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988252342930292, + "compression_loss": 0.0, + "distillation_loss": 0.06423240900039673, + "epoch": 6.18, + "learning_rate": 1.6798268023404727e-06, + "loss": 0.0602, + "step": 6510, + "task_loss": 0.023659339174628258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988270736962028, + "compression_loss": 0.0, + "distillation_loss": 0.06197018176317215, + "epoch": 6.18, + "learning_rate": 1.6759890202021289e-06, + "loss": 0.0629, + "step": 6511, + "task_loss": 0.07137607038021088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988289111783287, + "compression_loss": 0.0, + "distillation_loss": 0.026029150933027267, + "epoch": 6.18, + "learning_rate": 1.6721554749827116e-06, + "loss": 0.0309, + "step": 6512, + "task_loss": 0.07451260089874268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988307467404108, + "compression_loss": 0.0, + "distillation_loss": 0.04224439710378647, + "epoch": 6.19, + "learning_rate": 1.6683261673786033e-06, + "loss": 0.052, + "step": 6513, + "task_loss": 0.14017590880393982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988325803834527, + "compression_loss": 0.0, + "distillation_loss": 0.06829048693180084, + "epoch": 6.19, + "learning_rate": 1.664501098085408e-06, + "loss": 0.0639, + "step": 6514, + "task_loss": 0.024488359689712524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988344121084583, + "compression_loss": 0.0, + "distillation_loss": 0.08032073080539703, + "epoch": 6.19, + "learning_rate": 1.6606802677979732e-06, + "loss": 0.0757, + "step": 6515, + "task_loss": 0.03441820666193962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988362419164309, + "compression_loss": 0.0, + "distillation_loss": 0.025070223957300186, + "epoch": 6.19, + "learning_rate": 1.656863677210374e-06, + "loss": 0.0523, + "step": 6516, + "task_loss": 0.2975619435310364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988380698083745, + "compression_loss": 0.0, + "distillation_loss": 0.035705260932445526, + "epoch": 6.19, + "learning_rate": 1.6530513270159116e-06, + "loss": 0.0358, + "step": 6517, + "task_loss": 0.03673187643289566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988398957852926, + "compression_loss": 0.0, + "distillation_loss": 0.09474039822816849, + "epoch": 6.19, + "learning_rate": 1.6492432179071094e-06, + "loss": 0.0979, + "step": 6518, + "task_loss": 0.12658804655075073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798841719848189, + "compression_loss": 0.0, + "distillation_loss": 0.057927001267671585, + "epoch": 6.19, + "learning_rate": 1.645439350575742e-06, + "loss": 0.0682, + "step": 6519, + "task_loss": 0.16106078028678894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988435419980673, + "compression_loss": 0.0, + "distillation_loss": 0.05671095848083496, + "epoch": 6.19, + "learning_rate": 1.6416397257127902e-06, + "loss": 0.0594, + "step": 6520, + "task_loss": 0.08323853462934494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988453622359312, + "compression_loss": 0.0, + "distillation_loss": 0.06431238353252411, + "epoch": 6.19, + "learning_rate": 1.637844344008474e-06, + "loss": 0.0647, + "step": 6521, + "task_loss": 0.06814312934875488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988471805627844, + "compression_loss": 0.0, + "distillation_loss": 0.03375518321990967, + "epoch": 6.19, + "learning_rate": 1.6340532061522474e-06, + "loss": 0.0327, + "step": 6522, + "task_loss": 0.022706888616085052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988489969796306, + "compression_loss": 0.0, + "distillation_loss": 0.052014727145433426, + "epoch": 6.19, + "learning_rate": 1.6302663128327927e-06, + "loss": 0.0496, + "step": 6523, + "task_loss": 0.028193766251206398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988508114874735, + "compression_loss": 0.0, + "distillation_loss": 0.10802538692951202, + "epoch": 6.2, + "learning_rate": 1.62648366473801e-06, + "loss": 0.104, + "step": 6524, + "task_loss": 0.06763241440057755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988526240873166, + "compression_loss": 0.0, + "distillation_loss": 0.05575351417064667, + "epoch": 6.2, + "learning_rate": 1.6227052625550327e-06, + "loss": 0.0649, + "step": 6525, + "task_loss": 0.14689795672893524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988544347801638, + "compression_loss": 0.0, + "distillation_loss": 0.08440142869949341, + "epoch": 6.2, + "learning_rate": 1.6189311069702367e-06, + "loss": 0.0887, + "step": 6526, + "task_loss": 0.12698373198509216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988562435670188, + "compression_loss": 0.0, + "distillation_loss": 0.04269465059041977, + "epoch": 6.2, + "learning_rate": 1.61516119866921e-06, + "loss": 0.047, + "step": 6527, + "task_loss": 0.08578742295503616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798858050448885, + "compression_loss": 0.0, + "distillation_loss": 0.024140551686286926, + "epoch": 6.2, + "learning_rate": 1.6113955383367685e-06, + "loss": 0.024, + "step": 6528, + "task_loss": 0.022829843685030937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988598554267664, + "compression_loss": 0.0, + "distillation_loss": 0.033472198992967606, + "epoch": 6.2, + "learning_rate": 1.6076341266569734e-06, + "loss": 0.0353, + "step": 6529, + "task_loss": 0.05162709578871727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988616585016666, + "compression_loss": 0.0, + "distillation_loss": 0.04639057815074921, + "epoch": 6.2, + "learning_rate": 1.6038769643130973e-06, + "loss": 0.0581, + "step": 6530, + "task_loss": 0.1631542444229126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988634596745892, + "compression_loss": 0.0, + "distillation_loss": 0.041402582079172134, + "epoch": 6.2, + "learning_rate": 1.600124051987645e-06, + "loss": 0.0542, + "step": 6531, + "task_loss": 0.16910916566848755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988652589465378, + "compression_loss": 0.0, + "distillation_loss": 0.045691799372434616, + "epoch": 6.2, + "learning_rate": 1.5963753903623535e-06, + "loss": 0.0528, + "step": 6532, + "task_loss": 0.11651049554347992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988670563185163, + "compression_loss": 0.0, + "distillation_loss": 0.03516625612974167, + "epoch": 6.2, + "learning_rate": 1.592630980118187e-06, + "loss": 0.034, + "step": 6533, + "task_loss": 0.02300405688583851 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988688517915282, + "compression_loss": 0.0, + "distillation_loss": 0.039826877415180206, + "epoch": 6.21, + "learning_rate": 1.5888908219353349e-06, + "loss": 0.039, + "step": 6534, + "task_loss": 0.03174047917127609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988706453665774, + "compression_loss": 0.0, + "distillation_loss": 0.036477230489254, + "epoch": 6.21, + "learning_rate": 1.5851549164932116e-06, + "loss": 0.034, + "step": 6535, + "task_loss": 0.011402864009141922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988724370446674, + "compression_loss": 0.0, + "distillation_loss": 0.05315593630075455, + "epoch": 6.21, + "learning_rate": 1.5814232644704691e-06, + "loss": 0.0655, + "step": 6536, + "task_loss": 0.17702096700668335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988742268268018, + "compression_loss": 0.0, + "distillation_loss": 0.1766928732395172, + "epoch": 6.21, + "learning_rate": 1.5776958665449731e-06, + "loss": 0.1711, + "step": 6537, + "task_loss": 0.12121965736150742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988760147139845, + "compression_loss": 0.0, + "distillation_loss": 0.04658925160765648, + "epoch": 6.21, + "learning_rate": 1.5739727233938239e-06, + "loss": 0.0449, + "step": 6538, + "task_loss": 0.029311183840036392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988778007072191, + "compression_loss": 0.0, + "distillation_loss": 0.038374051451683044, + "epoch": 6.21, + "learning_rate": 1.5702538356933555e-06, + "loss": 0.0453, + "step": 6539, + "task_loss": 0.1074916422367096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988795848075092, + "compression_loss": 0.0, + "distillation_loss": 0.026442725211381912, + "epoch": 6.21, + "learning_rate": 1.5665392041191107e-06, + "loss": 0.0276, + "step": 6540, + "task_loss": 0.03780139237642288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988813670158587, + "compression_loss": 0.0, + "distillation_loss": 0.025853261351585388, + "epoch": 6.21, + "learning_rate": 1.5628288293458804e-06, + "loss": 0.0292, + "step": 6541, + "task_loss": 0.058958619832992554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798883147333271, + "compression_loss": 0.0, + "distillation_loss": 0.06454972922801971, + "epoch": 6.21, + "learning_rate": 1.5591227120476643e-06, + "loss": 0.0724, + "step": 6542, + "task_loss": 0.1430365890264511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79888492576075, + "compression_loss": 0.0, + "distillation_loss": 0.025234917178750038, + "epoch": 6.21, + "learning_rate": 1.5554208528977044e-06, + "loss": 0.0235, + "step": 6543, + "task_loss": 0.007690908387303352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988867022992993, + "compression_loss": 0.0, + "distillation_loss": 0.06024536117911339, + "epoch": 6.21, + "learning_rate": 1.5517232525684571e-06, + "loss": 0.0693, + "step": 6544, + "task_loss": 0.15101151168346405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988884769499225, + "compression_loss": 0.0, + "distillation_loss": 0.04414193704724312, + "epoch": 6.22, + "learning_rate": 1.548029911731605e-06, + "loss": 0.0416, + "step": 6545, + "task_loss": 0.019007055088877678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988902497136234, + "compression_loss": 0.0, + "distillation_loss": 0.028746753931045532, + "epoch": 6.22, + "learning_rate": 1.5443408310580692e-06, + "loss": 0.0323, + "step": 6546, + "task_loss": 0.06452102959156036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988920205914057, + "compression_loss": 0.0, + "distillation_loss": 0.02359924092888832, + "epoch": 6.22, + "learning_rate": 1.5406560112179864e-06, + "loss": 0.0291, + "step": 6547, + "task_loss": 0.07876047492027283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988937895842729, + "compression_loss": 0.0, + "distillation_loss": 0.020535187795758247, + "epoch": 6.22, + "learning_rate": 1.5369754528807152e-06, + "loss": 0.019, + "step": 6548, + "task_loss": 0.005380989983677864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798895556693229, + "compression_loss": 0.0, + "distillation_loss": 0.04782063886523247, + "epoch": 6.22, + "learning_rate": 1.5332991567148515e-06, + "loss": 0.058, + "step": 6549, + "task_loss": 0.1497877985239029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988973219192774, + "compression_loss": 0.0, + "distillation_loss": 0.024879930540919304, + "epoch": 6.22, + "learning_rate": 1.5296271233882165e-06, + "loss": 0.0321, + "step": 6550, + "task_loss": 0.09727062284946442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7988990852634219, + "compression_loss": 0.0, + "distillation_loss": 0.040715500712394714, + "epoch": 6.22, + "learning_rate": 1.5259593535678491e-06, + "loss": 0.0399, + "step": 6551, + "task_loss": 0.03258955478668213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989008467266661, + "compression_loss": 0.0, + "distillation_loss": 0.03168099373579025, + "epoch": 6.22, + "learning_rate": 1.522295847920019e-06, + "loss": 0.0492, + "step": 6552, + "task_loss": 0.20730939507484436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989026063100138, + "compression_loss": 0.0, + "distillation_loss": 0.08038985729217529, + "epoch": 6.22, + "learning_rate": 1.5186366071102133e-06, + "loss": 0.0947, + "step": 6553, + "task_loss": 0.22315886616706848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989043640144685, + "compression_loss": 0.0, + "distillation_loss": 0.022372357547283173, + "epoch": 6.22, + "learning_rate": 1.5149816318031584e-06, + "loss": 0.0208, + "step": 6554, + "task_loss": 0.00664399191737175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989061198410341, + "compression_loss": 0.0, + "distillation_loss": 0.06297887861728668, + "epoch": 6.23, + "learning_rate": 1.5113309226627986e-06, + "loss": 0.0727, + "step": 6555, + "task_loss": 0.16039499640464783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989078737907143, + "compression_loss": 0.0, + "distillation_loss": 0.06179758906364441, + "epoch": 6.23, + "learning_rate": 1.5076844803522922e-06, + "loss": 0.0664, + "step": 6556, + "task_loss": 0.10753493010997772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989096258645125, + "compression_loss": 0.0, + "distillation_loss": 0.025135168805718422, + "epoch": 6.23, + "learning_rate": 1.5040423055340396e-06, + "loss": 0.0317, + "step": 6557, + "task_loss": 0.09094193577766418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989113760634325, + "compression_loss": 0.0, + "distillation_loss": 0.045239001512527466, + "epoch": 6.23, + "learning_rate": 1.5004043988696647e-06, + "loss": 0.0541, + "step": 6558, + "task_loss": 0.13419125974178314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989131243884782, + "compression_loss": 0.0, + "distillation_loss": 0.0402006097137928, + "epoch": 6.23, + "learning_rate": 1.4967707610200083e-06, + "loss": 0.0407, + "step": 6559, + "task_loss": 0.0453285276889801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798914870840653, + "compression_loss": 0.0, + "distillation_loss": 0.043260738253593445, + "epoch": 6.23, + "learning_rate": 1.493141392645131e-06, + "loss": 0.0406, + "step": 6560, + "task_loss": 0.01651701144874096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989166154209608, + "compression_loss": 0.0, + "distillation_loss": 0.1227022111415863, + "epoch": 6.23, + "learning_rate": 1.4895162944043334e-06, + "loss": 0.1221, + "step": 6561, + "task_loss": 0.11656280606985092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989183581304051, + "compression_loss": 0.0, + "distillation_loss": 0.01965763419866562, + "epoch": 6.23, + "learning_rate": 1.4858954669561275e-06, + "loss": 0.0259, + "step": 6562, + "task_loss": 0.08250312507152557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989200989699897, + "compression_loss": 0.0, + "distillation_loss": 0.03354266285896301, + "epoch": 6.23, + "learning_rate": 1.4822789109582513e-06, + "loss": 0.031, + "step": 6563, + "task_loss": 0.00812080129981041 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989218379407182, + "compression_loss": 0.0, + "distillation_loss": 0.10104814171791077, + "epoch": 6.23, + "learning_rate": 1.478666627067679e-06, + "loss": 0.0967, + "step": 6564, + "task_loss": 0.05804811045527458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989235750435945, + "compression_loss": 0.0, + "distillation_loss": 0.07657065242528915, + "epoch": 6.23, + "learning_rate": 1.4750586159405915e-06, + "loss": 0.0822, + "step": 6565, + "task_loss": 0.13330647349357605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989253102796219, + "compression_loss": 0.0, + "distillation_loss": 0.018394935876131058, + "epoch": 6.24, + "learning_rate": 1.4714548782324034e-06, + "loss": 0.0265, + "step": 6566, + "task_loss": 0.09906549006700516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989270436498043, + "compression_loss": 0.0, + "distillation_loss": 0.051937442272901535, + "epoch": 6.24, + "learning_rate": 1.467855414597749e-06, + "loss": 0.0475, + "step": 6567, + "task_loss": 0.007465232163667679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989287751551454, + "compression_loss": 0.0, + "distillation_loss": 0.049969661980867386, + "epoch": 6.24, + "learning_rate": 1.4642602256904946e-06, + "loss": 0.0474, + "step": 6568, + "task_loss": 0.023928040638566017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798930504796649, + "compression_loss": 0.0, + "distillation_loss": 0.03371240198612213, + "epoch": 6.24, + "learning_rate": 1.4606693121637206e-06, + "loss": 0.0316, + "step": 6569, + "task_loss": 0.012854812666773796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989322325753184, + "compression_loss": 0.0, + "distillation_loss": 0.2045125961303711, + "epoch": 6.24, + "learning_rate": 1.457082674669727e-06, + "loss": 0.2015, + "step": 6570, + "task_loss": 0.17422333359718323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989339584921576, + "compression_loss": 0.0, + "distillation_loss": 0.02525508962571621, + "epoch": 6.24, + "learning_rate": 1.4535003138600566e-06, + "loss": 0.0305, + "step": 6571, + "task_loss": 0.07814519852399826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989356825481703, + "compression_loss": 0.0, + "distillation_loss": 0.07085863500833511, + "epoch": 6.24, + "learning_rate": 1.4499222303854532e-06, + "loss": 0.0728, + "step": 6572, + "task_loss": 0.09030604362487793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989374047443599, + "compression_loss": 0.0, + "distillation_loss": 0.09062013030052185, + "epoch": 6.24, + "learning_rate": 1.4463484248958908e-06, + "loss": 0.0856, + "step": 6573, + "task_loss": 0.04006872698664665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989391250817305, + "compression_loss": 0.0, + "distillation_loss": 0.0634981244802475, + "epoch": 6.24, + "learning_rate": 1.4427788980405728e-06, + "loss": 0.0603, + "step": 6574, + "task_loss": 0.031479597091674805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989408435612854, + "compression_loss": 0.0, + "distillation_loss": 0.0360356867313385, + "epoch": 6.24, + "learning_rate": 1.4392136504679244e-06, + "loss": 0.0351, + "step": 6575, + "task_loss": 0.026443321257829666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989425601840284, + "compression_loss": 0.0, + "distillation_loss": 0.0444665402173996, + "epoch": 6.25, + "learning_rate": 1.4356526828255862e-06, + "loss": 0.0505, + "step": 6576, + "task_loss": 0.10513627529144287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989442749509632, + "compression_loss": 0.0, + "distillation_loss": 0.026376407593488693, + "epoch": 6.25, + "learning_rate": 1.432095995760424e-06, + "loss": 0.034, + "step": 6577, + "task_loss": 0.10233157873153687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989459878630936, + "compression_loss": 0.0, + "distillation_loss": 0.04820052161812782, + "epoch": 6.25, + "learning_rate": 1.4285435899185295e-06, + "loss": 0.0516, + "step": 6578, + "task_loss": 0.08200520277023315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989476989214231, + "compression_loss": 0.0, + "distillation_loss": 0.06717506051063538, + "epoch": 6.25, + "learning_rate": 1.424995465945214e-06, + "loss": 0.0792, + "step": 6579, + "task_loss": 0.1877632588148117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989494081269555, + "compression_loss": 0.0, + "distillation_loss": 0.05002850666642189, + "epoch": 6.25, + "learning_rate": 1.4214516244850068e-06, + "loss": 0.0534, + "step": 6580, + "task_loss": 0.08330188691616058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989511154806944, + "compression_loss": 0.0, + "distillation_loss": 0.055970218032598495, + "epoch": 6.25, + "learning_rate": 1.417912066181673e-06, + "loss": 0.0618, + "step": 6581, + "task_loss": 0.11391516029834747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989528209836435, + "compression_loss": 0.0, + "distillation_loss": 0.03707553446292877, + "epoch": 6.25, + "learning_rate": 1.414376791678182e-06, + "loss": 0.045, + "step": 6582, + "task_loss": 0.11639431864023209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989545246368066, + "compression_loss": 0.0, + "distillation_loss": 0.09972971677780151, + "epoch": 6.25, + "learning_rate": 1.4108458016167337e-06, + "loss": 0.1031, + "step": 6583, + "task_loss": 0.13307958841323853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989562264411872, + "compression_loss": 0.0, + "distillation_loss": 0.15906114876270294, + "epoch": 6.25, + "learning_rate": 1.407319096638754e-06, + "loss": 0.16, + "step": 6584, + "task_loss": 0.16835139691829681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989579263977891, + "compression_loss": 0.0, + "distillation_loss": 0.24002781510353088, + "epoch": 6.25, + "learning_rate": 1.403796677384886e-06, + "loss": 0.2357, + "step": 6585, + "task_loss": 0.19643017649650574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989596245076159, + "compression_loss": 0.0, + "distillation_loss": 0.023733289912343025, + "epoch": 6.25, + "learning_rate": 1.4002785444949928e-06, + "loss": 0.0217, + "step": 6586, + "task_loss": 0.003847165033221245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989613207716714, + "compression_loss": 0.0, + "distillation_loss": 0.028031958267092705, + "epoch": 6.26, + "learning_rate": 1.396764698608155e-06, + "loss": 0.0294, + "step": 6587, + "task_loss": 0.04165419936180115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989630151909591, + "compression_loss": 0.0, + "distillation_loss": 0.0592382550239563, + "epoch": 6.26, + "learning_rate": 1.393255140362687e-06, + "loss": 0.0561, + "step": 6588, + "task_loss": 0.027590807527303696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798964707766483, + "compression_loss": 0.0, + "distillation_loss": 0.04887683689594269, + "epoch": 6.26, + "learning_rate": 1.3897498703961148e-06, + "loss": 0.047, + "step": 6589, + "task_loss": 0.030398232862353325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989663984992464, + "compression_loss": 0.0, + "distillation_loss": 0.12447142601013184, + "epoch": 6.26, + "learning_rate": 1.3862488893451847e-06, + "loss": 0.1271, + "step": 6590, + "task_loss": 0.15074971318244934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989680873902533, + "compression_loss": 0.0, + "distillation_loss": 0.04007259011268616, + "epoch": 6.26, + "learning_rate": 1.3827521978458713e-06, + "loss": 0.0504, + "step": 6591, + "task_loss": 0.14287948608398438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989697744405071, + "compression_loss": 0.0, + "distillation_loss": 0.05581611022353172, + "epoch": 6.26, + "learning_rate": 1.3792597965333581e-06, + "loss": 0.0642, + "step": 6592, + "task_loss": 0.13964907824993134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989714596510117, + "compression_loss": 0.0, + "distillation_loss": 0.024131378158926964, + "epoch": 6.26, + "learning_rate": 1.3757716860420683e-06, + "loss": 0.0303, + "step": 6593, + "task_loss": 0.08553370833396912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989731430227707, + "compression_loss": 0.0, + "distillation_loss": 0.020088283345103264, + "epoch": 6.26, + "learning_rate": 1.3722878670056227e-06, + "loss": 0.0185, + "step": 6594, + "task_loss": 0.003951072692871094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989748245567877, + "compression_loss": 0.0, + "distillation_loss": 0.031761474907398224, + "epoch": 6.26, + "learning_rate": 1.368808340056879e-06, + "loss": 0.0333, + "step": 6595, + "task_loss": 0.04679109528660774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989765042540665, + "compression_loss": 0.0, + "distillation_loss": 0.02415706217288971, + "epoch": 6.26, + "learning_rate": 1.3653331058279122e-06, + "loss": 0.0225, + "step": 6596, + "task_loss": 0.007262144237756729 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989781821156109, + "compression_loss": 0.0, + "distillation_loss": 0.0280197374522686, + "epoch": 6.26, + "learning_rate": 1.361862164950009e-06, + "loss": 0.0287, + "step": 6597, + "task_loss": 0.03486378863453865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989798581424242, + "compression_loss": 0.0, + "distillation_loss": 0.015901973471045494, + "epoch": 6.27, + "learning_rate": 1.35839551805369e-06, + "loss": 0.0156, + "step": 6598, + "task_loss": 0.012705270200967789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989815323355105, + "compression_loss": 0.0, + "distillation_loss": 0.024427423253655434, + "epoch": 6.27, + "learning_rate": 1.354933165768682e-06, + "loss": 0.0236, + "step": 6599, + "task_loss": 0.016438543796539307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989832046958733, + "compression_loss": 0.0, + "distillation_loss": 0.05082686245441437, + "epoch": 6.27, + "learning_rate": 1.3514751087239402e-06, + "loss": 0.0588, + "step": 6600, + "task_loss": 0.13012255728244781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989848752245162, + "compression_loss": 0.0, + "distillation_loss": 0.019267812371253967, + "epoch": 6.27, + "learning_rate": 1.3480213475476344e-06, + "loss": 0.026, + "step": 6601, + "task_loss": 0.08636897802352905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798986543922443, + "compression_loss": 0.0, + "distillation_loss": 0.09770554304122925, + "epoch": 6.27, + "learning_rate": 1.3445718828671655e-06, + "loss": 0.0972, + "step": 6602, + "task_loss": 0.09245851635932922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989882107906573, + "compression_loss": 0.0, + "distillation_loss": 0.03061073273420334, + "epoch": 6.27, + "learning_rate": 1.3411267153091378e-06, + "loss": 0.0339, + "step": 6603, + "task_loss": 0.06348737329244614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989898758301629, + "compression_loss": 0.0, + "distillation_loss": 0.09166794270277023, + "epoch": 6.27, + "learning_rate": 1.3376858454993813e-06, + "loss": 0.1133, + "step": 6604, + "task_loss": 0.30779021978378296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989915390419633, + "compression_loss": 0.0, + "distillation_loss": 0.030499190092086792, + "epoch": 6.27, + "learning_rate": 1.3342492740629541e-06, + "loss": 0.0326, + "step": 6605, + "task_loss": 0.05136849731206894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989932004270623, + "compression_loss": 0.0, + "distillation_loss": 0.05024800822138786, + "epoch": 6.27, + "learning_rate": 1.330817001624124e-06, + "loss": 0.0474, + "step": 6606, + "task_loss": 0.02216871827840805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989948599864637, + "compression_loss": 0.0, + "distillation_loss": 0.029151551425457, + "epoch": 6.27, + "learning_rate": 1.3273890288063722e-06, + "loss": 0.0349, + "step": 6607, + "task_loss": 0.08693638443946838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.798996517721171, + "compression_loss": 0.0, + "distillation_loss": 0.08503906428813934, + "epoch": 6.28, + "learning_rate": 1.323965356232415e-06, + "loss": 0.0872, + "step": 6608, + "task_loss": 0.10668846219778061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989981736321878, + "compression_loss": 0.0, + "distillation_loss": 0.03245946019887924, + "epoch": 6.28, + "learning_rate": 1.3205459845241714e-06, + "loss": 0.0299, + "step": 6609, + "task_loss": 0.006885243579745293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7989998277205181, + "compression_loss": 0.0, + "distillation_loss": 0.0519396997988224, + "epoch": 6.28, + "learning_rate": 1.317130914302797e-06, + "loss": 0.051, + "step": 6610, + "task_loss": 0.04283105209469795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990014799871654, + "compression_loss": 0.0, + "distillation_loss": 0.03950023651123047, + "epoch": 6.28, + "learning_rate": 1.3137201461886434e-06, + "loss": 0.0457, + "step": 6611, + "task_loss": 0.10137784481048584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990031304331333, + "compression_loss": 0.0, + "distillation_loss": 0.03535441309213638, + "epoch": 6.28, + "learning_rate": 1.3103136808013061e-06, + "loss": 0.0381, + "step": 6612, + "task_loss": 0.06290829181671143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990047790594256, + "compression_loss": 0.0, + "distillation_loss": 0.07202986627817154, + "epoch": 6.28, + "learning_rate": 1.3069115187595793e-06, + "loss": 0.0809, + "step": 6613, + "task_loss": 0.1610376387834549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799006425867046, + "compression_loss": 0.0, + "distillation_loss": 0.128701314330101, + "epoch": 6.28, + "learning_rate": 1.3035136606814769e-06, + "loss": 0.1386, + "step": 6614, + "task_loss": 0.22806209325790405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799008070856998, + "compression_loss": 0.0, + "distillation_loss": 0.05727306380867958, + "epoch": 6.28, + "learning_rate": 1.3001201071842466e-06, + "loss": 0.0639, + "step": 6615, + "task_loss": 0.1234469786286354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990097140302854, + "compression_loss": 0.0, + "distillation_loss": 0.029445184394717216, + "epoch": 6.28, + "learning_rate": 1.2967308588843375e-06, + "loss": 0.027, + "step": 6616, + "task_loss": 0.0048278942704200745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799011355387912, + "compression_loss": 0.0, + "distillation_loss": 0.06337439268827438, + "epoch": 6.28, + "learning_rate": 1.2933459163974203e-06, + "loss": 0.077, + "step": 6617, + "task_loss": 0.19938617944717407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990129949308814, + "compression_loss": 0.0, + "distillation_loss": 0.03333733230829239, + "epoch": 6.28, + "learning_rate": 1.2899652803383926e-06, + "loss": 0.0305, + "step": 6618, + "task_loss": 0.00532466359436512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990146326601971, + "compression_loss": 0.0, + "distillation_loss": 0.07982062548398972, + "epoch": 6.29, + "learning_rate": 1.286588951321363e-06, + "loss": 0.0878, + "step": 6619, + "task_loss": 0.1597793698310852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990162685768631, + "compression_loss": 0.0, + "distillation_loss": 0.02913430519402027, + "epoch": 6.29, + "learning_rate": 1.2832169299596546e-06, + "loss": 0.027, + "step": 6620, + "task_loss": 0.00752607174217701 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990179026818829, + "compression_loss": 0.0, + "distillation_loss": 0.05205439403653145, + "epoch": 6.29, + "learning_rate": 1.2798492168658083e-06, + "loss": 0.0606, + "step": 6621, + "task_loss": 0.13726334273815155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990195349762601, + "compression_loss": 0.0, + "distillation_loss": 0.050472185015678406, + "epoch": 6.29, + "learning_rate": 1.2764858126515928e-06, + "loss": 0.0563, + "step": 6622, + "task_loss": 0.10921429097652435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990211654609986, + "compression_loss": 0.0, + "distillation_loss": 0.031548745930194855, + "epoch": 6.29, + "learning_rate": 1.2731267179279832e-06, + "loss": 0.0319, + "step": 6623, + "task_loss": 0.03549543768167496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799022794137102, + "compression_loss": 0.0, + "distillation_loss": 0.06983719766139984, + "epoch": 6.29, + "learning_rate": 1.2697719333051723e-06, + "loss": 0.082, + "step": 6624, + "task_loss": 0.191563218832016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990244210055738, + "compression_loss": 0.0, + "distillation_loss": 0.019796200096607208, + "epoch": 6.29, + "learning_rate": 1.2664214593925776e-06, + "loss": 0.0262, + "step": 6625, + "task_loss": 0.08367738872766495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990260460674179, + "compression_loss": 0.0, + "distillation_loss": 0.038845110684633255, + "epoch": 6.29, + "learning_rate": 1.263075296798824e-06, + "loss": 0.052, + "step": 6626, + "task_loss": 0.17067357897758484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990276693236379, + "compression_loss": 0.0, + "distillation_loss": 0.029467133805155754, + "epoch": 6.29, + "learning_rate": 1.2597334461317667e-06, + "loss": 0.0382, + "step": 6627, + "task_loss": 0.11635088920593262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990292907752375, + "compression_loss": 0.0, + "distillation_loss": 0.0395752415060997, + "epoch": 6.29, + "learning_rate": 1.2563959079984588e-06, + "loss": 0.037, + "step": 6628, + "task_loss": 0.014232108369469643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990309104232205, + "compression_loss": 0.0, + "distillation_loss": 0.024423949420452118, + "epoch": 6.3, + "learning_rate": 1.2530626830051878e-06, + "loss": 0.0302, + "step": 6629, + "task_loss": 0.08202096819877625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990325282685903, + "compression_loss": 0.0, + "distillation_loss": 0.03499576449394226, + "epoch": 6.3, + "learning_rate": 1.249733771757447e-06, + "loss": 0.0361, + "step": 6630, + "task_loss": 0.045992329716682434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990341443123508, + "compression_loss": 0.0, + "distillation_loss": 0.11989525705575943, + "epoch": 6.3, + "learning_rate": 1.2464091748599443e-06, + "loss": 0.1271, + "step": 6631, + "task_loss": 0.19175073504447937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990357585555057, + "compression_loss": 0.0, + "distillation_loss": 0.030376242473721504, + "epoch": 6.3, + "learning_rate": 1.243088892916619e-06, + "loss": 0.0319, + "step": 6632, + "task_loss": 0.04513759911060333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990373709990585, + "compression_loss": 0.0, + "distillation_loss": 0.10441594570875168, + "epoch": 6.3, + "learning_rate": 1.239772926530608e-06, + "loss": 0.1108, + "step": 6633, + "task_loss": 0.16776102781295776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799038981644013, + "compression_loss": 0.0, + "distillation_loss": 0.04732033982872963, + "epoch": 6.3, + "learning_rate": 1.2364612763042793e-06, + "loss": 0.0544, + "step": 6634, + "task_loss": 0.11861524730920792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990405904913729, + "compression_loss": 0.0, + "distillation_loss": 0.022771336138248444, + "epoch": 6.3, + "learning_rate": 1.2331539428391963e-06, + "loss": 0.021, + "step": 6635, + "task_loss": 0.005372023209929466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990421975421418, + "compression_loss": 0.0, + "distillation_loss": 0.06504938006401062, + "epoch": 6.3, + "learning_rate": 1.2298509267361702e-06, + "loss": 0.0743, + "step": 6636, + "task_loss": 0.1580170840024948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990438027973236, + "compression_loss": 0.0, + "distillation_loss": 0.04712830111384392, + "epoch": 6.3, + "learning_rate": 1.2265522285952013e-06, + "loss": 0.051, + "step": 6637, + "task_loss": 0.08559815585613251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990454062579216, + "compression_loss": 0.0, + "distillation_loss": 0.024217963218688965, + "epoch": 6.3, + "learning_rate": 1.2232578490155105e-06, + "loss": 0.0226, + "step": 6638, + "task_loss": 0.0075660087168216705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990470079249398, + "compression_loss": 0.0, + "distillation_loss": 0.021058350801467896, + "epoch": 6.3, + "learning_rate": 1.219967788595544e-06, + "loss": 0.0246, + "step": 6639, + "task_loss": 0.05632723867893219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990486077993818, + "compression_loss": 0.0, + "distillation_loss": 0.056007929146289825, + "epoch": 6.31, + "learning_rate": 1.2166820479329572e-06, + "loss": 0.0745, + "step": 6640, + "task_loss": 0.24108853936195374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990502058822513, + "compression_loss": 0.0, + "distillation_loss": 0.02392977848649025, + "epoch": 6.31, + "learning_rate": 1.2134006276246169e-06, + "loss": 0.0225, + "step": 6641, + "task_loss": 0.00946834497153759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990518021745519, + "compression_loss": 0.0, + "distillation_loss": 0.027623049914836884, + "epoch": 6.31, + "learning_rate": 1.2101235282666045e-06, + "loss": 0.026, + "step": 6642, + "task_loss": 0.011274173855781555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990533966772873, + "compression_loss": 0.0, + "distillation_loss": 0.04122261703014374, + "epoch": 6.31, + "learning_rate": 1.2068507504542332e-06, + "loss": 0.0483, + "step": 6643, + "task_loss": 0.11227674782276154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990549893914611, + "compression_loss": 0.0, + "distillation_loss": 0.014780269004404545, + "epoch": 6.31, + "learning_rate": 1.2035822947820074e-06, + "loss": 0.0215, + "step": 6644, + "task_loss": 0.08238159120082855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990565803180774, + "compression_loss": 0.0, + "distillation_loss": 0.10595270246267319, + "epoch": 6.31, + "learning_rate": 1.200318161843661e-06, + "loss": 0.1129, + "step": 6645, + "task_loss": 0.17518508434295654 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990581694581393, + "compression_loss": 0.0, + "distillation_loss": 0.04436454176902771, + "epoch": 6.31, + "learning_rate": 1.1970583522321472e-06, + "loss": 0.0475, + "step": 6646, + "task_loss": 0.07612089067697525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990597568126508, + "compression_loss": 0.0, + "distillation_loss": 0.028508765622973442, + "epoch": 6.31, + "learning_rate": 1.1938028665396173e-06, + "loss": 0.0261, + "step": 6647, + "task_loss": 0.003986643627285957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990613423826156, + "compression_loss": 0.0, + "distillation_loss": 0.12394885718822479, + "epoch": 6.31, + "learning_rate": 1.190551705357451e-06, + "loss": 0.1199, + "step": 6648, + "task_loss": 0.08359791338443756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990629261690374, + "compression_loss": 0.0, + "distillation_loss": 0.0263461172580719, + "epoch": 6.31, + "learning_rate": 1.187304869276229e-06, + "loss": 0.0244, + "step": 6649, + "task_loss": 0.006703455001115799 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990645081729196, + "compression_loss": 0.0, + "distillation_loss": 0.04055074229836464, + "epoch": 6.32, + "learning_rate": 1.184062358885768e-06, + "loss": 0.0377, + "step": 6650, + "task_loss": 0.012343864887952805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990660883952663, + "compression_loss": 0.0, + "distillation_loss": 0.03743916004896164, + "epoch": 6.32, + "learning_rate": 1.1808241747750748e-06, + "loss": 0.0488, + "step": 6651, + "task_loss": 0.1512567400932312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990676668370809, + "compression_loss": 0.0, + "distillation_loss": 0.05502176284790039, + "epoch": 6.32, + "learning_rate": 1.1775903175323787e-06, + "loss": 0.0604, + "step": 6652, + "task_loss": 0.10876131802797318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799069243499367, + "compression_loss": 0.0, + "distillation_loss": 0.026486551389098167, + "epoch": 6.32, + "learning_rate": 1.174360787745138e-06, + "loss": 0.0318, + "step": 6653, + "task_loss": 0.07932649552822113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990708183831287, + "compression_loss": 0.0, + "distillation_loss": 0.09362354874610901, + "epoch": 6.32, + "learning_rate": 1.1711355860000079e-06, + "loss": 0.091, + "step": 6654, + "task_loss": 0.06723588705062866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990723914893691, + "compression_loss": 0.0, + "distillation_loss": 0.13072776794433594, + "epoch": 6.32, + "learning_rate": 1.167914712882856e-06, + "loss": 0.134, + "step": 6655, + "task_loss": 0.1634681522846222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990739628190924, + "compression_loss": 0.0, + "distillation_loss": 0.014383634552359581, + "epoch": 6.32, + "learning_rate": 1.1646981689787728e-06, + "loss": 0.0228, + "step": 6656, + "task_loss": 0.09892392158508301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990755323733021, + "compression_loss": 0.0, + "distillation_loss": 0.013216648250818253, + "epoch": 6.32, + "learning_rate": 1.1614859548720603e-06, + "loss": 0.0251, + "step": 6657, + "task_loss": 0.13216069340705872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990771001530017, + "compression_loss": 0.0, + "distillation_loss": 0.06353971362113953, + "epoch": 6.32, + "learning_rate": 1.1582780711462321e-06, + "loss": 0.0629, + "step": 6658, + "task_loss": 0.057146959006786346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990786661591952, + "compression_loss": 0.0, + "distillation_loss": 0.06731215119361877, + "epoch": 6.32, + "learning_rate": 1.1550745183840139e-06, + "loss": 0.0659, + "step": 6659, + "task_loss": 0.05347077175974846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799080230392886, + "compression_loss": 0.0, + "distillation_loss": 0.018060829490423203, + "epoch": 6.32, + "learning_rate": 1.1518752971673485e-06, + "loss": 0.0169, + "step": 6660, + "task_loss": 0.006073372438549995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799081792855078, + "compression_loss": 0.0, + "distillation_loss": 0.05580337345600128, + "epoch": 6.33, + "learning_rate": 1.1486804080773877e-06, + "loss": 0.0684, + "step": 6661, + "task_loss": 0.18163037300109863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990833535467747, + "compression_loss": 0.0, + "distillation_loss": 0.07261084020137787, + "epoch": 6.33, + "learning_rate": 1.1454898516945035e-06, + "loss": 0.0715, + "step": 6662, + "task_loss": 0.061981115490198135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79908491246898, + "compression_loss": 0.0, + "distillation_loss": 0.031433992087841034, + "epoch": 6.33, + "learning_rate": 1.142303628598268e-06, + "loss": 0.0484, + "step": 6663, + "task_loss": 0.20125338435173035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990864696226974, + "compression_loss": 0.0, + "distillation_loss": 0.057327330112457275, + "epoch": 6.33, + "learning_rate": 1.1391217393674825e-06, + "loss": 0.0524, + "step": 6664, + "task_loss": 0.008000411093235016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990880250089306, + "compression_loss": 0.0, + "distillation_loss": 0.02260660007596016, + "epoch": 6.33, + "learning_rate": 1.1359441845801483e-06, + "loss": 0.0213, + "step": 6665, + "task_loss": 0.00976894237101078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990895786286834, + "compression_loss": 0.0, + "distillation_loss": 0.019381048157811165, + "epoch": 6.33, + "learning_rate": 1.1327709648134787e-06, + "loss": 0.0255, + "step": 6666, + "task_loss": 0.08072338998317719 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990911304829592, + "compression_loss": 0.0, + "distillation_loss": 0.042968470603227615, + "epoch": 6.33, + "learning_rate": 1.1296020806439128e-06, + "loss": 0.0399, + "step": 6667, + "task_loss": 0.012241264805197716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990926805727621, + "compression_loss": 0.0, + "distillation_loss": 0.05627443641424179, + "epoch": 6.33, + "learning_rate": 1.1264375326470926e-06, + "loss": 0.0537, + "step": 6668, + "task_loss": 0.030838757753372192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990942288990954, + "compression_loss": 0.0, + "distillation_loss": 0.043274518102407455, + "epoch": 6.33, + "learning_rate": 1.1232773213978642e-06, + "loss": 0.0483, + "step": 6669, + "task_loss": 0.09389711171388626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799095775462963, + "compression_loss": 0.0, + "distillation_loss": 0.027110418304800987, + "epoch": 6.33, + "learning_rate": 1.1201214474703043e-06, + "loss": 0.0259, + "step": 6670, + "task_loss": 0.014662139117717743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990973202653685, + "compression_loss": 0.0, + "distillation_loss": 0.022032059729099274, + "epoch": 6.34, + "learning_rate": 1.1169699114376931e-06, + "loss": 0.031, + "step": 6671, + "task_loss": 0.11220581084489822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7990988633073157, + "compression_loss": 0.0, + "distillation_loss": 0.014706656336784363, + "epoch": 6.34, + "learning_rate": 1.1138227138725171e-06, + "loss": 0.0135, + "step": 6672, + "task_loss": 0.0031169112771749496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991004045898081, + "compression_loss": 0.0, + "distillation_loss": 0.030809402465820312, + "epoch": 6.34, + "learning_rate": 1.1106798553464804e-06, + "loss": 0.0434, + "step": 6673, + "task_loss": 0.15683190524578094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991019441138495, + "compression_loss": 0.0, + "distillation_loss": 0.12929940223693848, + "epoch": 6.34, + "learning_rate": 1.1075413364305037e-06, + "loss": 0.1301, + "step": 6674, + "task_loss": 0.1377396285533905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991034818804436, + "compression_loss": 0.0, + "distillation_loss": 0.017488310113549232, + "epoch": 6.34, + "learning_rate": 1.1044071576947118e-06, + "loss": 0.016, + "step": 6675, + "task_loss": 0.0023362338542938232 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799105017890594, + "compression_loss": 0.0, + "distillation_loss": 0.024973999708890915, + "epoch": 6.34, + "learning_rate": 1.101277319708438e-06, + "loss": 0.0233, + "step": 6676, + "task_loss": 0.007980858907103539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991065521453045, + "compression_loss": 0.0, + "distillation_loss": 0.0375574491918087, + "epoch": 6.34, + "learning_rate": 1.0981518230402387e-06, + "loss": 0.0393, + "step": 6677, + "task_loss": 0.05460818111896515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991080846455786, + "compression_loss": 0.0, + "distillation_loss": 0.04282548278570175, + "epoch": 6.34, + "learning_rate": 1.0950306682578709e-06, + "loss": 0.041, + "step": 6678, + "task_loss": 0.024096237495541573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991096153924201, + "compression_loss": 0.0, + "distillation_loss": 0.08127694576978683, + "epoch": 6.34, + "learning_rate": 1.0919138559283143e-06, + "loss": 0.0851, + "step": 6679, + "task_loss": 0.11989080905914307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991111443868326, + "compression_loss": 0.0, + "distillation_loss": 0.025165293365716934, + "epoch": 6.34, + "learning_rate": 1.0888013866177437e-06, + "loss": 0.037, + "step": 6680, + "task_loss": 0.1433994174003601 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991126716298199, + "compression_loss": 0.0, + "distillation_loss": 0.027159132063388824, + "epoch": 6.34, + "learning_rate": 1.0856932608915627e-06, + "loss": 0.0298, + "step": 6681, + "task_loss": 0.05401374772191048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991141971223857, + "compression_loss": 0.0, + "distillation_loss": 0.13656893372535706, + "epoch": 6.35, + "learning_rate": 1.0825894793143721e-06, + "loss": 0.1396, + "step": 6682, + "task_loss": 0.16644863784313202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991157208655336, + "compression_loss": 0.0, + "distillation_loss": 0.05064243823289871, + "epoch": 6.35, + "learning_rate": 1.0794900424499876e-06, + "loss": 0.0506, + "step": 6683, + "task_loss": 0.05057719722390175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991172428602672, + "compression_loss": 0.0, + "distillation_loss": 0.07734186947345734, + "epoch": 6.35, + "learning_rate": 1.0763949508614423e-06, + "loss": 0.0739, + "step": 6684, + "task_loss": 0.04286087676882744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991187631075903, + "compression_loss": 0.0, + "distillation_loss": 0.08190207928419113, + "epoch": 6.35, + "learning_rate": 1.0733042051109726e-06, + "loss": 0.0776, + "step": 6685, + "task_loss": 0.038602665066719055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991202816085067, + "compression_loss": 0.0, + "distillation_loss": 0.04311536252498627, + "epoch": 6.35, + "learning_rate": 1.070217805760021e-06, + "loss": 0.0555, + "step": 6686, + "task_loss": 0.16687297821044922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991217983640198, + "compression_loss": 0.0, + "distillation_loss": 0.03107578307390213, + "epoch": 6.35, + "learning_rate": 1.0671357533692554e-06, + "loss": 0.0346, + "step": 6687, + "task_loss": 0.06593281030654907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991233133751334, + "compression_loss": 0.0, + "distillation_loss": 0.023783832788467407, + "epoch": 6.35, + "learning_rate": 1.0640580484985424e-06, + "loss": 0.027, + "step": 6688, + "task_loss": 0.05620795860886574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991248266428512, + "compression_loss": 0.0, + "distillation_loss": 0.09371946007013321, + "epoch": 6.35, + "learning_rate": 1.0609846917069622e-06, + "loss": 0.088, + "step": 6689, + "task_loss": 0.03661830723285675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799126338168177, + "compression_loss": 0.0, + "distillation_loss": 0.22859099507331848, + "epoch": 6.35, + "learning_rate": 1.0579156835528015e-06, + "loss": 0.2157, + "step": 6690, + "task_loss": 0.09983595460653305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991278479521143, + "compression_loss": 0.0, + "distillation_loss": 0.0681435763835907, + "epoch": 6.35, + "learning_rate": 1.0548510245935673e-06, + "loss": 0.0716, + "step": 6691, + "task_loss": 0.10267331451177597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991293559956669, + "compression_loss": 0.0, + "distillation_loss": 0.029233068227767944, + "epoch": 6.36, + "learning_rate": 1.051790715385964e-06, + "loss": 0.0305, + "step": 6692, + "task_loss": 0.041752371937036514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991308622998384, + "compression_loss": 0.0, + "distillation_loss": 0.02068510465323925, + "epoch": 6.36, + "learning_rate": 1.0487347564859113e-06, + "loss": 0.0214, + "step": 6693, + "task_loss": 0.02766001597046852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991323668656325, + "compression_loss": 0.0, + "distillation_loss": 0.009869755245745182, + "epoch": 6.36, + "learning_rate": 1.0456831484485423e-06, + "loss": 0.0093, + "step": 6694, + "task_loss": 0.003924252465367317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991338696940529, + "compression_loss": 0.0, + "distillation_loss": 0.06949397921562195, + "epoch": 6.36, + "learning_rate": 1.0426358918281948e-06, + "loss": 0.0649, + "step": 6695, + "task_loss": 0.023511435836553574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991353707861033, + "compression_loss": 0.0, + "distillation_loss": 0.03212964907288551, + "epoch": 6.36, + "learning_rate": 1.0395929871784144e-06, + "loss": 0.0365, + "step": 6696, + "task_loss": 0.07626573741436005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991368701427873, + "compression_loss": 0.0, + "distillation_loss": 0.08683188259601593, + "epoch": 6.36, + "learning_rate": 1.0365544350519646e-06, + "loss": 0.0829, + "step": 6697, + "task_loss": 0.047511257231235504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991383677651087, + "compression_loss": 0.0, + "distillation_loss": 0.031793013215065, + "epoch": 6.36, + "learning_rate": 1.0335202360008124e-06, + "loss": 0.0338, + "step": 6698, + "task_loss": 0.051480717957019806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991398636540712, + "compression_loss": 0.0, + "distillation_loss": 0.024651937186717987, + "epoch": 6.36, + "learning_rate": 1.0304903905761332e-06, + "loss": 0.0227, + "step": 6699, + "task_loss": 0.0050739627331495285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991413578106783, + "compression_loss": 0.0, + "distillation_loss": 0.03288833424448967, + "epoch": 6.36, + "learning_rate": 1.0274648993283093e-06, + "loss": 0.0333, + "step": 6700, + "task_loss": 0.03704278543591499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991428502359338, + "compression_loss": 0.0, + "distillation_loss": 0.07430990785360336, + "epoch": 6.36, + "learning_rate": 1.0244437628069425e-06, + "loss": 0.0758, + "step": 6701, + "task_loss": 0.08946188539266586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991443409308414, + "compression_loss": 0.0, + "distillation_loss": 0.031692154705524445, + "epoch": 6.36, + "learning_rate": 1.0214269815608358e-06, + "loss": 0.0424, + "step": 6702, + "task_loss": 0.1384189873933792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991458298964047, + "compression_loss": 0.0, + "distillation_loss": 0.03650377690792084, + "epoch": 6.37, + "learning_rate": 1.018414556137995e-06, + "loss": 0.0546, + "step": 6703, + "task_loss": 0.21728603541851044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991473171336275, + "compression_loss": 0.0, + "distillation_loss": 0.14218676090240479, + "epoch": 6.37, + "learning_rate": 1.015406487085646e-06, + "loss": 0.1351, + "step": 6704, + "task_loss": 0.07109387218952179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991488026435134, + "compression_loss": 0.0, + "distillation_loss": 0.08769318461418152, + "epoch": 6.37, + "learning_rate": 1.0124027749502246e-06, + "loss": 0.092, + "step": 6705, + "task_loss": 0.13085408508777618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991502864270661, + "compression_loss": 0.0, + "distillation_loss": 0.0496244877576828, + "epoch": 6.37, + "learning_rate": 1.0094034202773634e-06, + "loss": 0.0473, + "step": 6706, + "task_loss": 0.026335952803492546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991517684852892, + "compression_loss": 0.0, + "distillation_loss": 0.058904923498630524, + "epoch": 6.37, + "learning_rate": 1.006408423611907e-06, + "loss": 0.0619, + "step": 6707, + "task_loss": 0.08892402052879333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991532488191866, + "compression_loss": 0.0, + "distillation_loss": 0.03160509839653969, + "epoch": 6.37, + "learning_rate": 1.0034177854979205e-06, + "loss": 0.0358, + "step": 6708, + "task_loss": 0.07310265302658081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991547274297618, + "compression_loss": 0.0, + "distillation_loss": 0.02190888673067093, + "epoch": 6.37, + "learning_rate": 1.0004315064786608e-06, + "loss": 0.0332, + "step": 6709, + "task_loss": 0.134353369474411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991562043180185, + "compression_loss": 0.0, + "distillation_loss": 0.02776992879807949, + "epoch": 6.37, + "learning_rate": 9.974495870965967e-07, + "loss": 0.0338, + "step": 6710, + "task_loss": 0.08767490833997726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991576794849604, + "compression_loss": 0.0, + "distillation_loss": 0.03906933218240738, + "epoch": 6.37, + "learning_rate": 9.944720278934171e-07, + "loss": 0.0357, + "step": 6711, + "task_loss": 0.005863867700099945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991591529315912, + "compression_loss": 0.0, + "distillation_loss": 0.05178176239132881, + "epoch": 6.37, + "learning_rate": 9.914988294100063e-07, + "loss": 0.0492, + "step": 6712, + "task_loss": 0.026018494740128517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991606246589146, + "compression_loss": 0.0, + "distillation_loss": 0.018913403153419495, + "epoch": 6.38, + "learning_rate": 9.885299921864543e-07, + "loss": 0.0217, + "step": 6713, + "task_loss": 0.04700911417603493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991620946679343, + "compression_loss": 0.0, + "distillation_loss": 0.045953452587127686, + "epoch": 6.38, + "learning_rate": 9.855655167620715e-07, + "loss": 0.0522, + "step": 6714, + "task_loss": 0.10820292681455612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991635629596539, + "compression_loss": 0.0, + "distillation_loss": 0.09742830693721771, + "epoch": 6.38, + "learning_rate": 9.826054036753713e-07, + "loss": 0.092, + "step": 6715, + "task_loss": 0.04326290637254715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991650295350771, + "compression_loss": 0.0, + "distillation_loss": 0.03743236884474754, + "epoch": 6.38, + "learning_rate": 9.796496534640653e-07, + "loss": 0.0345, + "step": 6716, + "task_loss": 0.008360574021935463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991664943952076, + "compression_loss": 0.0, + "distillation_loss": 0.060953132808208466, + "epoch": 6.38, + "learning_rate": 9.766982666650826e-07, + "loss": 0.0634, + "step": 6717, + "task_loss": 0.08522552996873856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991679575410491, + "compression_loss": 0.0, + "distillation_loss": 0.047717705368995667, + "epoch": 6.38, + "learning_rate": 9.737512438145579e-07, + "loss": 0.053, + "step": 6718, + "task_loss": 0.10008193552494049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991694189736052, + "compression_loss": 0.0, + "distillation_loss": 0.024083293974399567, + "epoch": 6.38, + "learning_rate": 9.708085854478327e-07, + "loss": 0.022, + "step": 6719, + "task_loss": 0.0036613382399082184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991708786938797, + "compression_loss": 0.0, + "distillation_loss": 0.05113540589809418, + "epoch": 6.38, + "learning_rate": 9.678702920994543e-07, + "loss": 0.0567, + "step": 6720, + "task_loss": 0.1063266173005104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991723367028762, + "compression_loss": 0.0, + "distillation_loss": 0.028987891972064972, + "epoch": 6.38, + "learning_rate": 9.649363643031733e-07, + "loss": 0.0268, + "step": 6721, + "task_loss": 0.007400134578347206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991737930015985, + "compression_loss": 0.0, + "distillation_loss": 0.04050924628973007, + "epoch": 6.38, + "learning_rate": 9.620068025919583e-07, + "loss": 0.0462, + "step": 6722, + "task_loss": 0.097807377576828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991752475910502, + "compression_loss": 0.0, + "distillation_loss": 0.0265050008893013, + "epoch": 6.38, + "learning_rate": 9.590816074979774e-07, + "loss": 0.0258, + "step": 6723, + "task_loss": 0.019627349451184273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991767004722349, + "compression_loss": 0.0, + "distillation_loss": 0.10371723771095276, + "epoch": 6.39, + "learning_rate": 9.561607795526007e-07, + "loss": 0.1172, + "step": 6724, + "task_loss": 0.2384086400270462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991781516461564, + "compression_loss": 0.0, + "distillation_loss": 0.033512018620967865, + "epoch": 6.39, + "learning_rate": 9.532443192864199e-07, + "loss": 0.0316, + "step": 6725, + "task_loss": 0.013961471617221832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991796011138184, + "compression_loss": 0.0, + "distillation_loss": 0.03146098554134369, + "epoch": 6.39, + "learning_rate": 9.50332227229217e-07, + "loss": 0.0535, + "step": 6726, + "task_loss": 0.2520584762096405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991810488762244, + "compression_loss": 0.0, + "distillation_loss": 0.026955943554639816, + "epoch": 6.39, + "learning_rate": 9.474245039099882e-07, + "loss": 0.0253, + "step": 6727, + "task_loss": 0.010826632380485535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991824949343782, + "compression_loss": 0.0, + "distillation_loss": 0.024500368162989616, + "epoch": 6.39, + "learning_rate": 9.445211498569362e-07, + "loss": 0.0334, + "step": 6728, + "task_loss": 0.11393891274929047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991839392892836, + "compression_loss": 0.0, + "distillation_loss": 0.05089277774095535, + "epoch": 6.39, + "learning_rate": 9.416221655974722e-07, + "loss": 0.0473, + "step": 6729, + "task_loss": 0.014700580388307571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991853819419441, + "compression_loss": 0.0, + "distillation_loss": 0.01643327623605728, + "epoch": 6.39, + "learning_rate": 9.387275516582056e-07, + "loss": 0.0304, + "step": 6730, + "task_loss": 0.1562596559524536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991868228933635, + "compression_loss": 0.0, + "distillation_loss": 0.07891779392957687, + "epoch": 6.39, + "learning_rate": 9.358373085649602e-07, + "loss": 0.0943, + "step": 6731, + "task_loss": 0.23281759023666382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991882621445454, + "compression_loss": 0.0, + "distillation_loss": 0.020287442952394485, + "epoch": 6.39, + "learning_rate": 9.329514368427633e-07, + "loss": 0.0234, + "step": 6732, + "task_loss": 0.051706451922655106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991896996964936, + "compression_loss": 0.0, + "distillation_loss": 0.0532609187066555, + "epoch": 6.39, + "learning_rate": 9.300699370158456e-07, + "loss": 0.0599, + "step": 6733, + "task_loss": 0.11954209208488464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991911355502115, + "compression_loss": 0.0, + "distillation_loss": 0.037625864148139954, + "epoch": 6.4, + "learning_rate": 9.271928096076493e-07, + "loss": 0.0411, + "step": 6734, + "task_loss": 0.07242215424776077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991925697067032, + "compression_loss": 0.0, + "distillation_loss": 0.02129533141851425, + "epoch": 6.4, + "learning_rate": 9.243200551408094e-07, + "loss": 0.0389, + "step": 6735, + "task_loss": 0.19778040051460266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799194002166972, + "compression_loss": 0.0, + "distillation_loss": 0.030612638220191002, + "epoch": 6.4, + "learning_rate": 9.214516741371831e-07, + "loss": 0.0464, + "step": 6736, + "task_loss": 0.18832482397556305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991954329320218, + "compression_loss": 0.0, + "distillation_loss": 0.01869923248887062, + "epoch": 6.4, + "learning_rate": 9.185876671178262e-07, + "loss": 0.0174, + "step": 6737, + "task_loss": 0.005990633741021156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991968620028562, + "compression_loss": 0.0, + "distillation_loss": 0.02486245334148407, + "epoch": 6.4, + "learning_rate": 9.157280346029918e-07, + "loss": 0.0232, + "step": 6738, + "task_loss": 0.00861707329750061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799198289380479, + "compression_loss": 0.0, + "distillation_loss": 0.03501253202557564, + "epoch": 6.4, + "learning_rate": 9.128727771121531e-07, + "loss": 0.0323, + "step": 6739, + "task_loss": 0.007670162245631218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7991997150658937, + "compression_loss": 0.0, + "distillation_loss": 0.041302017867565155, + "epoch": 6.4, + "learning_rate": 9.100218951639816e-07, + "loss": 0.0531, + "step": 6740, + "task_loss": 0.15916121006011963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992011390601041, + "compression_loss": 0.0, + "distillation_loss": 0.056890472769737244, + "epoch": 6.4, + "learning_rate": 9.071753892763519e-07, + "loss": 0.0589, + "step": 6741, + "task_loss": 0.0772441178560257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992025613641139, + "compression_loss": 0.0, + "distillation_loss": 0.02093365229666233, + "epoch": 6.4, + "learning_rate": 9.043332599663418e-07, + "loss": 0.0277, + "step": 6742, + "task_loss": 0.08839461952447891 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992039819789267, + "compression_loss": 0.0, + "distillation_loss": 0.09711463004350662, + "epoch": 6.4, + "learning_rate": 9.014955077502413e-07, + "loss": 0.1142, + "step": 6743, + "task_loss": 0.268157958984375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992054009055463, + "compression_loss": 0.0, + "distillation_loss": 0.0171291995793581, + "epoch": 6.4, + "learning_rate": 8.986621331435435e-07, + "loss": 0.0159, + "step": 6744, + "task_loss": 0.004604209214448929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992068181449762, + "compression_loss": 0.0, + "distillation_loss": 0.09282074868679047, + "epoch": 6.41, + "learning_rate": 8.958331366609423e-07, + "loss": 0.0897, + "step": 6745, + "task_loss": 0.061169348657131195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992082336982201, + "compression_loss": 0.0, + "distillation_loss": 0.07672721892595291, + "epoch": 6.41, + "learning_rate": 8.930085188163378e-07, + "loss": 0.08, + "step": 6746, + "task_loss": 0.1096218079328537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799209647566282, + "compression_loss": 0.0, + "distillation_loss": 0.02508268877863884, + "epoch": 6.41, + "learning_rate": 8.90188280122839e-07, + "loss": 0.0238, + "step": 6747, + "task_loss": 0.012722663581371307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992110597501652, + "compression_loss": 0.0, + "distillation_loss": 0.024517200887203217, + "epoch": 6.41, + "learning_rate": 8.87372421092747e-07, + "loss": 0.0225, + "step": 6748, + "task_loss": 0.00447007454931736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992124702508735, + "compression_loss": 0.0, + "distillation_loss": 0.01891644299030304, + "epoch": 6.41, + "learning_rate": 8.845609422375861e-07, + "loss": 0.0175, + "step": 6749, + "task_loss": 0.00444909930229187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992138790694107, + "compression_loss": 0.0, + "distillation_loss": 0.02350194752216339, + "epoch": 6.41, + "learning_rate": 8.817538440680728e-07, + "loss": 0.0224, + "step": 6750, + "task_loss": 0.01221693865954876 + }, + { + "epoch": 6.41, + "eval_accuracy": 0.8910550458715596, + "eval_loss": 0.43947499990463257, + "eval_runtime": 18.2778, + "eval_samples_per_second": 47.708, + "eval_steps_per_second": 5.964, + "step": 6750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992152862067803, + "compression_loss": 0.0, + "distillation_loss": 0.03163065388798714, + "epoch": 6.41, + "learning_rate": 8.78951127094127e-07, + "loss": 0.0336, + "step": 6751, + "task_loss": 0.05086345970630646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992166916639861, + "compression_loss": 0.0, + "distillation_loss": 0.10874065011739731, + "epoch": 6.41, + "learning_rate": 8.761527918248775e-07, + "loss": 0.1034, + "step": 6752, + "task_loss": 0.05520922690629959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992180954420318, + "compression_loss": 0.0, + "distillation_loss": 0.02465911954641342, + "epoch": 6.41, + "learning_rate": 8.733588387686537e-07, + "loss": 0.0227, + "step": 6753, + "task_loss": 0.005142947658896446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992194975419211, + "compression_loss": 0.0, + "distillation_loss": 0.07918893545866013, + "epoch": 6.41, + "learning_rate": 8.705692684329969e-07, + "loss": 0.0822, + "step": 6754, + "task_loss": 0.10922683775424957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992208979646576, + "compression_loss": 0.0, + "distillation_loss": 0.030382784083485603, + "epoch": 6.42, + "learning_rate": 8.677840813246352e-07, + "loss": 0.0294, + "step": 6755, + "task_loss": 0.020388955250382423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992222967112449, + "compression_loss": 0.0, + "distillation_loss": 0.035736992955207825, + "epoch": 6.42, + "learning_rate": 8.650032779495165e-07, + "loss": 0.0331, + "step": 6756, + "task_loss": 0.009340623393654823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992236937826869, + "compression_loss": 0.0, + "distillation_loss": 0.045183077454566956, + "epoch": 6.42, + "learning_rate": 8.622268588127924e-07, + "loss": 0.0426, + "step": 6757, + "task_loss": 0.019841017201542854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799225089179987, + "compression_loss": 0.0, + "distillation_loss": 0.0189533494412899, + "epoch": 6.42, + "learning_rate": 8.594548244188067e-07, + "loss": 0.0177, + "step": 6758, + "task_loss": 0.006474364548921585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992264829041492, + "compression_loss": 0.0, + "distillation_loss": 0.024439578875899315, + "epoch": 6.42, + "learning_rate": 8.56687175271112e-07, + "loss": 0.0229, + "step": 6759, + "task_loss": 0.008742136880755424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992278749561771, + "compression_loss": 0.0, + "distillation_loss": 0.026206394657492638, + "epoch": 6.42, + "learning_rate": 8.539239118724701e-07, + "loss": 0.0266, + "step": 6760, + "task_loss": 0.03009379468858242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992292653370742, + "compression_loss": 0.0, + "distillation_loss": 0.016620401293039322, + "epoch": 6.42, + "learning_rate": 8.511650347248406e-07, + "loss": 0.017, + "step": 6761, + "task_loss": 0.02082774229347706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992306540478442, + "compression_loss": 0.0, + "distillation_loss": 0.025677714496850967, + "epoch": 6.42, + "learning_rate": 8.484105443293783e-07, + "loss": 0.0237, + "step": 6762, + "task_loss": 0.005807174369692802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992320410894911, + "compression_loss": 0.0, + "distillation_loss": 0.024803977459669113, + "epoch": 6.42, + "learning_rate": 8.456604411864605e-07, + "loss": 0.0229, + "step": 6763, + "task_loss": 0.005409408360719681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992334264630182, + "compression_loss": 0.0, + "distillation_loss": 0.09612073004245758, + "epoch": 6.42, + "learning_rate": 8.429147257956516e-07, + "loss": 0.0983, + "step": 6764, + "task_loss": 0.11760664731264114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992348101694294, + "compression_loss": 0.0, + "distillation_loss": 0.05043857544660568, + "epoch": 6.42, + "learning_rate": 8.401733986557247e-07, + "loss": 0.0493, + "step": 6765, + "task_loss": 0.03950589895248413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992361922097283, + "compression_loss": 0.0, + "distillation_loss": 0.05412200465798378, + "epoch": 6.43, + "learning_rate": 8.374364602646511e-07, + "loss": 0.0535, + "step": 6766, + "task_loss": 0.04812926799058914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992375725849187, + "compression_loss": 0.0, + "distillation_loss": 0.02848942205309868, + "epoch": 6.43, + "learning_rate": 8.347039111196164e-07, + "loss": 0.0328, + "step": 6767, + "task_loss": 0.07136546820402145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992389512960041, + "compression_loss": 0.0, + "distillation_loss": 0.11072133481502533, + "epoch": 6.43, + "learning_rate": 8.319757517169985e-07, + "loss": 0.12, + "step": 6768, + "task_loss": 0.2039954513311386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992403283439883, + "compression_loss": 0.0, + "distillation_loss": 0.02107694186270237, + "epoch": 6.43, + "learning_rate": 8.29251982552376e-07, + "loss": 0.0194, + "step": 6769, + "task_loss": 0.004783786833286285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992417037298749, + "compression_loss": 0.0, + "distillation_loss": 0.013939835131168365, + "epoch": 6.43, + "learning_rate": 8.265326041205417e-07, + "loss": 0.0129, + "step": 6770, + "task_loss": 0.0032312143594026566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992430774546677, + "compression_loss": 0.0, + "distillation_loss": 0.04151005297899246, + "epoch": 6.43, + "learning_rate": 8.238176169154816e-07, + "loss": 0.0465, + "step": 6771, + "task_loss": 0.09118230640888214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992444495193702, + "compression_loss": 0.0, + "distillation_loss": 0.06526434421539307, + "epoch": 6.43, + "learning_rate": 8.211070214303812e-07, + "loss": 0.0745, + "step": 6772, + "task_loss": 0.15805087983608246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992458199249863, + "compression_loss": 0.0, + "distillation_loss": 0.024169165641069412, + "epoch": 6.43, + "learning_rate": 8.184008181576386e-07, + "loss": 0.0227, + "step": 6773, + "task_loss": 0.009067356586456299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992471886725196, + "compression_loss": 0.0, + "distillation_loss": 0.029119737446308136, + "epoch": 6.43, + "learning_rate": 8.156990075888466e-07, + "loss": 0.0281, + "step": 6774, + "task_loss": 0.0189868975430727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992485557629738, + "compression_loss": 0.0, + "distillation_loss": 0.03186260536313057, + "epoch": 6.43, + "learning_rate": 8.130015902148042e-07, + "loss": 0.0347, + "step": 6775, + "task_loss": 0.06021621823310852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992499211973525, + "compression_loss": 0.0, + "distillation_loss": 0.02992023155093193, + "epoch": 6.43, + "learning_rate": 8.103085665255084e-07, + "loss": 0.0543, + "step": 6776, + "task_loss": 0.27328985929489136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992512849766594, + "compression_loss": 0.0, + "distillation_loss": 0.04072340205311775, + "epoch": 6.44, + "learning_rate": 8.076199370101594e-07, + "loss": 0.0409, + "step": 6777, + "task_loss": 0.042335450649261475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992526471018983, + "compression_loss": 0.0, + "distillation_loss": 0.04745135456323624, + "epoch": 6.44, + "learning_rate": 8.04935702157164e-07, + "loss": 0.0502, + "step": 6778, + "task_loss": 0.07529226690530777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992540075740727, + "compression_loss": 0.0, + "distillation_loss": 0.04864822328090668, + "epoch": 6.44, + "learning_rate": 8.022558624541182e-07, + "loss": 0.0524, + "step": 6779, + "task_loss": 0.08609342575073242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992553663941864, + "compression_loss": 0.0, + "distillation_loss": 0.11904314160346985, + "epoch": 6.44, + "learning_rate": 7.995804183878353e-07, + "loss": 0.1204, + "step": 6780, + "task_loss": 0.13219860196113586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992567235632431, + "compression_loss": 0.0, + "distillation_loss": 0.0507618710398674, + "epoch": 6.44, + "learning_rate": 7.969093704443209e-07, + "loss": 0.0556, + "step": 6781, + "task_loss": 0.09945084899663925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992580790822463, + "compression_loss": 0.0, + "distillation_loss": 0.08536731451749802, + "epoch": 6.44, + "learning_rate": 7.942427191087786e-07, + "loss": 0.0833, + "step": 6782, + "task_loss": 0.06510312855243683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992594329521999, + "compression_loss": 0.0, + "distillation_loss": 0.09828724712133408, + "epoch": 6.44, + "learning_rate": 7.915804648656239e-07, + "loss": 0.1016, + "step": 6783, + "task_loss": 0.13165351748466492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992607851741076, + "compression_loss": 0.0, + "distillation_loss": 0.06299099326133728, + "epoch": 6.44, + "learning_rate": 7.889226081984696e-07, + "loss": 0.0704, + "step": 6784, + "task_loss": 0.13664023578166962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992621357489729, + "compression_loss": 0.0, + "distillation_loss": 0.017018694430589676, + "epoch": 6.44, + "learning_rate": 7.862691495901243e-07, + "loss": 0.0163, + "step": 6785, + "task_loss": 0.00940592773258686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992634846777995, + "compression_loss": 0.0, + "distillation_loss": 0.08001522719860077, + "epoch": 6.44, + "learning_rate": 7.83620089522602e-07, + "loss": 0.0758, + "step": 6786, + "task_loss": 0.038091909140348434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992648319615913, + "compression_loss": 0.0, + "distillation_loss": 0.013534367084503174, + "epoch": 6.45, + "learning_rate": 7.809754284771181e-07, + "loss": 0.0144, + "step": 6787, + "task_loss": 0.0219335425645113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992661776013518, + "compression_loss": 0.0, + "distillation_loss": 0.03589925542473793, + "epoch": 6.45, + "learning_rate": 7.783351669340882e-07, + "loss": 0.0332, + "step": 6788, + "task_loss": 0.009313993155956268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992675215980846, + "compression_loss": 0.0, + "distillation_loss": 0.044496770948171616, + "epoch": 6.45, + "learning_rate": 7.756993053731259e-07, + "loss": 0.0418, + "step": 6789, + "task_loss": 0.01771704852581024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992688639527935, + "compression_loss": 0.0, + "distillation_loss": 0.025459101423621178, + "epoch": 6.45, + "learning_rate": 7.730678442730538e-07, + "loss": 0.0239, + "step": 6790, + "task_loss": 0.009527930989861488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992702046664822, + "compression_loss": 0.0, + "distillation_loss": 0.02458195760846138, + "epoch": 6.45, + "learning_rate": 7.704407841118811e-07, + "loss": 0.0229, + "step": 6791, + "task_loss": 0.007813390344381332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992715437401545, + "compression_loss": 0.0, + "distillation_loss": 0.01990782469511032, + "epoch": 6.45, + "learning_rate": 7.678181253668343e-07, + "loss": 0.0228, + "step": 6792, + "task_loss": 0.04860430955886841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992728811748139, + "compression_loss": 0.0, + "distillation_loss": 0.024153951555490494, + "epoch": 6.45, + "learning_rate": 7.651998685143269e-07, + "loss": 0.0354, + "step": 6793, + "task_loss": 0.13703292608261108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799274216971464, + "compression_loss": 0.0, + "distillation_loss": 0.02501821331679821, + "epoch": 6.45, + "learning_rate": 7.625860140299811e-07, + "loss": 0.0231, + "step": 6794, + "task_loss": 0.006082385778427124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992755511311087, + "compression_loss": 0.0, + "distillation_loss": 0.06825748831033707, + "epoch": 6.45, + "learning_rate": 7.599765623886146e-07, + "loss": 0.0673, + "step": 6795, + "task_loss": 0.05868315324187279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992768836547516, + "compression_loss": 0.0, + "distillation_loss": 0.040932249277830124, + "epoch": 6.45, + "learning_rate": 7.573715140642451e-07, + "loss": 0.0558, + "step": 6796, + "task_loss": 0.18955527245998383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992782145433963, + "compression_loss": 0.0, + "distillation_loss": 0.03785280883312225, + "epoch": 6.45, + "learning_rate": 7.547708695300942e-07, + "loss": 0.0502, + "step": 6797, + "task_loss": 0.16129517555236816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992795437980466, + "compression_loss": 0.0, + "distillation_loss": 0.02329355478286743, + "epoch": 6.46, + "learning_rate": 7.521746292585841e-07, + "loss": 0.0357, + "step": 6798, + "task_loss": 0.1471756100654602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799280871419706, + "compression_loss": 0.0, + "distillation_loss": 0.03262361139059067, + "epoch": 6.46, + "learning_rate": 7.49582793721329e-07, + "loss": 0.0336, + "step": 6799, + "task_loss": 0.04221324622631073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992821974093786, + "compression_loss": 0.0, + "distillation_loss": 0.03289227932691574, + "epoch": 6.46, + "learning_rate": 7.469953633891469e-07, + "loss": 0.0401, + "step": 6800, + "task_loss": 0.10484490543603897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992835217680675, + "compression_loss": 0.0, + "distillation_loss": 0.07555963844060898, + "epoch": 6.46, + "learning_rate": 7.444123387320645e-07, + "loss": 0.0751, + "step": 6801, + "task_loss": 0.07083379477262497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992848444967768, + "compression_loss": 0.0, + "distillation_loss": 0.03143364191055298, + "epoch": 6.46, + "learning_rate": 7.418337202192982e-07, + "loss": 0.0313, + "step": 6802, + "task_loss": 0.03013058938086033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992861655965101, + "compression_loss": 0.0, + "distillation_loss": 0.03767804428935051, + "epoch": 6.46, + "learning_rate": 7.392595083192622e-07, + "loss": 0.0348, + "step": 6803, + "task_loss": 0.009084422141313553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992874850682711, + "compression_loss": 0.0, + "distillation_loss": 0.03183813393115997, + "epoch": 6.46, + "learning_rate": 7.366897034995796e-07, + "loss": 0.0295, + "step": 6804, + "task_loss": 0.008527562022209167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992888029130634, + "compression_loss": 0.0, + "distillation_loss": 0.022205591201782227, + "epoch": 6.46, + "learning_rate": 7.341243062270686e-07, + "loss": 0.028, + "step": 6805, + "task_loss": 0.0802656039595604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992901191318905, + "compression_loss": 0.0, + "distillation_loss": 0.036016229540109634, + "epoch": 6.46, + "learning_rate": 7.315633169677399e-07, + "loss": 0.0352, + "step": 6806, + "task_loss": 0.027843181043863297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992914337257565, + "compression_loss": 0.0, + "distillation_loss": 0.03833125904202461, + "epoch": 6.46, + "learning_rate": 7.290067361868103e-07, + "loss": 0.0388, + "step": 6807, + "task_loss": 0.0432337149977684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992927466956649, + "compression_loss": 0.0, + "distillation_loss": 0.01719411462545395, + "epoch": 6.47, + "learning_rate": 7.264545643486997e-07, + "loss": 0.0272, + "step": 6808, + "task_loss": 0.11720463633537292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992940580426192, + "compression_loss": 0.0, + "distillation_loss": 0.10508248209953308, + "epoch": 6.47, + "learning_rate": 7.239068019170209e-07, + "loss": 0.1087, + "step": 6809, + "task_loss": 0.14166167378425598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992953677676233, + "compression_loss": 0.0, + "distillation_loss": 0.04110556095838547, + "epoch": 6.47, + "learning_rate": 7.21363449354584e-07, + "loss": 0.0467, + "step": 6810, + "task_loss": 0.09688466787338257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992966758716809, + "compression_loss": 0.0, + "distillation_loss": 0.0683632344007492, + "epoch": 6.47, + "learning_rate": 7.188245071234057e-07, + "loss": 0.0735, + "step": 6811, + "task_loss": 0.11992549151182175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7992979823557955, + "compression_loss": 0.0, + "distillation_loss": 0.03459140658378601, + "epoch": 6.47, + "learning_rate": 7.162899756846975e-07, + "loss": 0.0356, + "step": 6812, + "task_loss": 0.04442333057522774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799299287220971, + "compression_loss": 0.0, + "distillation_loss": 0.05156092718243599, + "epoch": 6.47, + "learning_rate": 7.137598554988633e-07, + "loss": 0.0484, + "step": 6813, + "task_loss": 0.019665861502289772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993005904682109, + "compression_loss": 0.0, + "distillation_loss": 0.07404929399490356, + "epoch": 6.47, + "learning_rate": 7.112341470255163e-07, + "loss": 0.0795, + "step": 6814, + "task_loss": 0.12879236042499542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993018920985189, + "compression_loss": 0.0, + "distillation_loss": 0.055263321846723557, + "epoch": 6.47, + "learning_rate": 7.087128507234642e-07, + "loss": 0.0582, + "step": 6815, + "task_loss": 0.08427795022726059 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993031921128988, + "compression_loss": 0.0, + "distillation_loss": 0.06862097978591919, + "epoch": 6.47, + "learning_rate": 7.061959670507102e-07, + "loss": 0.0758, + "step": 6816, + "task_loss": 0.1407826542854309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993044905123541, + "compression_loss": 0.0, + "distillation_loss": 0.030200235545635223, + "epoch": 6.47, + "learning_rate": 7.036834964644523e-07, + "loss": 0.0385, + "step": 6817, + "task_loss": 0.11273118853569031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993057872978887, + "compression_loss": 0.0, + "distillation_loss": 0.015482475981116295, + "epoch": 6.47, + "learning_rate": 7.011754394211061e-07, + "loss": 0.0144, + "step": 6818, + "task_loss": 0.004941888153553009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993070824705062, + "compression_loss": 0.0, + "distillation_loss": 0.05421324819326401, + "epoch": 6.48, + "learning_rate": 6.986717963762656e-07, + "loss": 0.0614, + "step": 6819, + "task_loss": 0.12607041001319885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993083760312102, + "compression_loss": 0.0, + "distillation_loss": 0.05300523713231087, + "epoch": 6.48, + "learning_rate": 6.961725677847308e-07, + "loss": 0.0534, + "step": 6820, + "task_loss": 0.0573628805577755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993096679810044, + "compression_loss": 0.0, + "distillation_loss": 0.031698837876319885, + "epoch": 6.48, + "learning_rate": 6.936777541004941e-07, + "loss": 0.0293, + "step": 6821, + "task_loss": 0.007633762434124947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993109583208926, + "compression_loss": 0.0, + "distillation_loss": 0.053836889564991, + "epoch": 6.48, + "learning_rate": 6.911873557767568e-07, + "loss": 0.0578, + "step": 6822, + "task_loss": 0.09317664802074432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993122470518784, + "compression_loss": 0.0, + "distillation_loss": 0.02741752751171589, + "epoch": 6.48, + "learning_rate": 6.88701373265907e-07, + "loss": 0.0269, + "step": 6823, + "task_loss": 0.022505706176161766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993135341749654, + "compression_loss": 0.0, + "distillation_loss": 0.05598681792616844, + "epoch": 6.48, + "learning_rate": 6.862198070195363e-07, + "loss": 0.0687, + "step": 6824, + "task_loss": 0.18340638279914856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993148196911575, + "compression_loss": 0.0, + "distillation_loss": 0.060910556465387344, + "epoch": 6.48, + "learning_rate": 6.837426574884342e-07, + "loss": 0.0622, + "step": 6825, + "task_loss": 0.07380431145429611 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993161036014581, + "compression_loss": 0.0, + "distillation_loss": 0.03215963393449783, + "epoch": 6.48, + "learning_rate": 6.812699251225907e-07, + "loss": 0.0296, + "step": 6826, + "task_loss": 0.006164673715829849 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993173859068712, + "compression_loss": 0.0, + "distillation_loss": 0.0726788341999054, + "epoch": 6.48, + "learning_rate": 6.788016103711825e-07, + "loss": 0.0694, + "step": 6827, + "task_loss": 0.039418820291757584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993186666084002, + "compression_loss": 0.0, + "distillation_loss": 0.042292624711990356, + "epoch": 6.48, + "learning_rate": 6.763377136825927e-07, + "loss": 0.039, + "step": 6828, + "task_loss": 0.009175049141049385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993199457070489, + "compression_loss": 0.0, + "distillation_loss": 0.018559550866484642, + "epoch": 6.49, + "learning_rate": 6.738782355044049e-07, + "loss": 0.0244, + "step": 6829, + "task_loss": 0.07653959095478058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799321223203821, + "compression_loss": 0.0, + "distillation_loss": 0.14147916436195374, + "epoch": 6.49, + "learning_rate": 6.714231762833894e-07, + "loss": 0.1379, + "step": 6830, + "task_loss": 0.10579898953437805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993224990997202, + "compression_loss": 0.0, + "distillation_loss": 0.02243266999721527, + "epoch": 6.49, + "learning_rate": 6.689725364655203e-07, + "loss": 0.0242, + "step": 6831, + "task_loss": 0.04005550593137741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993237733957501, + "compression_loss": 0.0, + "distillation_loss": 0.0320717990398407, + "epoch": 6.49, + "learning_rate": 6.665263164959745e-07, + "loss": 0.049, + "step": 6832, + "task_loss": 0.20118948817253113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993250460929144, + "compression_loss": 0.0, + "distillation_loss": 0.02864461950957775, + "epoch": 6.49, + "learning_rate": 6.640845168191107e-07, + "loss": 0.0266, + "step": 6833, + "task_loss": 0.008286131545901299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993263171922169, + "compression_loss": 0.0, + "distillation_loss": 0.014984526671469212, + "epoch": 6.49, + "learning_rate": 6.616471378784961e-07, + "loss": 0.014, + "step": 6834, + "task_loss": 0.0047838687896728516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993275866946612, + "compression_loss": 0.0, + "distillation_loss": 0.0654832050204277, + "epoch": 6.49, + "learning_rate": 6.592141801168933e-07, + "loss": 0.0741, + "step": 6835, + "task_loss": 0.15198421478271484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993288546012509, + "compression_loss": 0.0, + "distillation_loss": 0.030285434797406197, + "epoch": 6.49, + "learning_rate": 6.567856439762654e-07, + "loss": 0.0406, + "step": 6836, + "task_loss": 0.13304871320724487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993301209129897, + "compression_loss": 0.0, + "distillation_loss": 0.052962590008974075, + "epoch": 6.49, + "learning_rate": 6.543615298977623e-07, + "loss": 0.0483, + "step": 6837, + "task_loss": 0.006451290100812912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993313856308815, + "compression_loss": 0.0, + "distillation_loss": 0.07559768110513687, + "epoch": 6.49, + "learning_rate": 6.519418383217347e-07, + "loss": 0.0742, + "step": 6838, + "task_loss": 0.061944808810949326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993326487559297, + "compression_loss": 0.0, + "distillation_loss": 0.042924731969833374, + "epoch": 6.49, + "learning_rate": 6.495265696877361e-07, + "loss": 0.0429, + "step": 6839, + "task_loss": 0.04244181513786316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993339102891381, + "compression_loss": 0.0, + "distillation_loss": 0.021773064509034157, + "epoch": 6.5, + "learning_rate": 6.471157244345105e-07, + "loss": 0.0202, + "step": 6840, + "task_loss": 0.005988283082842827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993351702315104, + "compression_loss": 0.0, + "distillation_loss": 0.01895114779472351, + "epoch": 6.5, + "learning_rate": 6.447093029999935e-07, + "loss": 0.0179, + "step": 6841, + "task_loss": 0.008197702467441559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993364285840503, + "compression_loss": 0.0, + "distillation_loss": 0.06650319695472717, + "epoch": 6.5, + "learning_rate": 6.423073058213325e-07, + "loss": 0.0657, + "step": 6842, + "task_loss": 0.058126937597990036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993376853477614, + "compression_loss": 0.0, + "distillation_loss": 0.030550938099622726, + "epoch": 6.5, + "learning_rate": 6.399097333348536e-07, + "loss": 0.0346, + "step": 6843, + "task_loss": 0.0713091567158699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993389405236474, + "compression_loss": 0.0, + "distillation_loss": 0.01258410420268774, + "epoch": 6.5, + "learning_rate": 6.375165859760946e-07, + "loss": 0.0116, + "step": 6844, + "task_loss": 0.003225104883313179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993401941127121, + "compression_loss": 0.0, + "distillation_loss": 0.04864715039730072, + "epoch": 6.5, + "learning_rate": 6.351278641797742e-07, + "loss": 0.0461, + "step": 6845, + "task_loss": 0.023460451513528824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799341446115959, + "compression_loss": 0.0, + "distillation_loss": 0.10957182943820953, + "epoch": 6.5, + "learning_rate": 6.327435683798233e-07, + "loss": 0.1103, + "step": 6846, + "task_loss": 0.1172788143157959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993426965343919, + "compression_loss": 0.0, + "distillation_loss": 0.050183918327093124, + "epoch": 6.5, + "learning_rate": 6.303636990093592e-07, + "loss": 0.059, + "step": 6847, + "task_loss": 0.13854621350765228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993439453690145, + "compression_loss": 0.0, + "distillation_loss": 0.013963685370981693, + "epoch": 6.5, + "learning_rate": 6.279882565006889e-07, + "loss": 0.0201, + "step": 6848, + "task_loss": 0.07548151910305023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993451926208305, + "compression_loss": 0.0, + "distillation_loss": 0.0308124627918005, + "epoch": 6.5, + "learning_rate": 6.256172412853339e-07, + "loss": 0.0313, + "step": 6849, + "task_loss": 0.0358404815196991 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993464382908434, + "compression_loss": 0.0, + "distillation_loss": 0.006880389526486397, + "epoch": 6.51, + "learning_rate": 6.232506537939941e-07, + "loss": 0.0065, + "step": 6850, + "task_loss": 0.00280972383916378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799347682380057, + "compression_loss": 0.0, + "distillation_loss": 0.03846500813961029, + "epoch": 6.51, + "learning_rate": 6.208884944565702e-07, + "loss": 0.0352, + "step": 6851, + "task_loss": 0.005746711045503616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993489248894751, + "compression_loss": 0.0, + "distillation_loss": 0.0295537281781435, + "epoch": 6.51, + "learning_rate": 6.185307637021631e-07, + "loss": 0.0389, + "step": 6852, + "task_loss": 0.12259507924318314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993501658201012, + "compression_loss": 0.0, + "distillation_loss": 0.08173021674156189, + "epoch": 6.51, + "learning_rate": 6.161774619590666e-07, + "loss": 0.0784, + "step": 6853, + "task_loss": 0.048187751322984695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799351405172939, + "compression_loss": 0.0, + "distillation_loss": 0.10911855846643448, + "epoch": 6.51, + "learning_rate": 6.138285896547691e-07, + "loss": 0.1035, + "step": 6854, + "task_loss": 0.05309395492076874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993526429489923, + "compression_loss": 0.0, + "distillation_loss": 0.06665603071451187, + "epoch": 6.51, + "learning_rate": 6.114841472159516e-07, + "loss": 0.0703, + "step": 6855, + "task_loss": 0.10340512543916702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993538791492647, + "compression_loss": 0.0, + "distillation_loss": 0.013052813708782196, + "epoch": 6.51, + "learning_rate": 6.091441350684957e-07, + "loss": 0.0121, + "step": 6856, + "task_loss": 0.003964599221944809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993551137747599, + "compression_loss": 0.0, + "distillation_loss": 0.030018752440810204, + "epoch": 6.51, + "learning_rate": 6.068085536374752e-07, + "loss": 0.0292, + "step": 6857, + "task_loss": 0.02144923247396946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993563468264816, + "compression_loss": 0.0, + "distillation_loss": 0.020197410136461258, + "epoch": 6.51, + "learning_rate": 6.04477403347159e-07, + "loss": 0.0321, + "step": 6858, + "task_loss": 0.13970831036567688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993575783054334, + "compression_loss": 0.0, + "distillation_loss": 0.052353113889694214, + "epoch": 6.51, + "learning_rate": 6.02150684621014e-07, + "loss": 0.0531, + "step": 6859, + "task_loss": 0.059832729399204254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993588082126191, + "compression_loss": 0.0, + "distillation_loss": 0.04445452615618706, + "epoch": 6.51, + "learning_rate": 5.998283978816966e-07, + "loss": 0.0456, + "step": 6860, + "task_loss": 0.05596046522259712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993600365490422, + "compression_loss": 0.0, + "distillation_loss": 0.10008314251899719, + "epoch": 6.52, + "learning_rate": 5.975105435510637e-07, + "loss": 0.1065, + "step": 6861, + "task_loss": 0.1641978919506073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993612633157067, + "compression_loss": 0.0, + "distillation_loss": 0.02374127134680748, + "epoch": 6.52, + "learning_rate": 5.951971220501645e-07, + "loss": 0.0222, + "step": 6862, + "task_loss": 0.008045762777328491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993624885136159, + "compression_loss": 0.0, + "distillation_loss": 0.04164247214794159, + "epoch": 6.52, + "learning_rate": 5.928881337992437e-07, + "loss": 0.0465, + "step": 6863, + "task_loss": 0.08977600187063217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993637121437738, + "compression_loss": 0.0, + "distillation_loss": 0.06360233575105667, + "epoch": 6.52, + "learning_rate": 5.905835792177406e-07, + "loss": 0.088, + "step": 6864, + "task_loss": 0.30765628814697266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993649342071839, + "compression_loss": 0.0, + "distillation_loss": 0.03090558759868145, + "epoch": 6.52, + "learning_rate": 5.882834587242842e-07, + "loss": 0.043, + "step": 6865, + "task_loss": 0.15227064490318298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79936615470485, + "compression_loss": 0.0, + "distillation_loss": 0.02005811221897602, + "epoch": 6.52, + "learning_rate": 5.859877727367069e-07, + "loss": 0.0293, + "step": 6866, + "task_loss": 0.11207105964422226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993673736377757, + "compression_loss": 0.0, + "distillation_loss": 0.06416010856628418, + "epoch": 6.52, + "learning_rate": 5.836965216720309e-07, + "loss": 0.0746, + "step": 6867, + "task_loss": 0.16828285157680511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993685910069646, + "compression_loss": 0.0, + "distillation_loss": 0.09754236787557602, + "epoch": 6.52, + "learning_rate": 5.814097059464702e-07, + "loss": 0.0941, + "step": 6868, + "task_loss": 0.06287850439548492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993698068134205, + "compression_loss": 0.0, + "distillation_loss": 0.016108129173517227, + "epoch": 6.52, + "learning_rate": 5.79127325975437e-07, + "loss": 0.0156, + "step": 6869, + "task_loss": 0.010948818176984787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993710210581472, + "compression_loss": 0.0, + "distillation_loss": 0.04941096901893616, + "epoch": 6.52, + "learning_rate": 5.768493821735387e-07, + "loss": 0.0568, + "step": 6870, + "task_loss": 0.12317119538784027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799372233742148, + "compression_loss": 0.0, + "distillation_loss": 0.06292635947465897, + "epoch": 6.53, + "learning_rate": 5.745758749545749e-07, + "loss": 0.0747, + "step": 6871, + "task_loss": 0.18059583008289337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799373444866427, + "compression_loss": 0.0, + "distillation_loss": 0.028696933761239052, + "epoch": 6.53, + "learning_rate": 5.723068047315344e-07, + "loss": 0.0358, + "step": 6872, + "task_loss": 0.09973844140768051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993746544319877, + "compression_loss": 0.0, + "distillation_loss": 0.10228434950113297, + "epoch": 6.53, + "learning_rate": 5.7004217191661e-07, + "loss": 0.0967, + "step": 6873, + "task_loss": 0.04644036293029785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993758624398338, + "compression_loss": 0.0, + "distillation_loss": 0.05492546781897545, + "epoch": 6.53, + "learning_rate": 5.677819769211807e-07, + "loss": 0.0512, + "step": 6874, + "task_loss": 0.01718452386558056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799377068890969, + "compression_loss": 0.0, + "distillation_loss": 0.0156172476708889, + "epoch": 6.53, + "learning_rate": 5.655262201558209e-07, + "loss": 0.0151, + "step": 6875, + "task_loss": 0.010073995217680931 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993782737863969, + "compression_loss": 0.0, + "distillation_loss": 0.04025673121213913, + "epoch": 6.53, + "learning_rate": 5.63274902030303e-07, + "loss": 0.0425, + "step": 6876, + "task_loss": 0.06272687762975693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993794771271213, + "compression_loss": 0.0, + "distillation_loss": 0.11300121247768402, + "epoch": 6.53, + "learning_rate": 5.610280229535858e-07, + "loss": 0.1092, + "step": 6877, + "task_loss": 0.07530970871448517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993806789141458, + "compression_loss": 0.0, + "distillation_loss": 0.03029126301407814, + "epoch": 6.53, + "learning_rate": 5.58785583333829e-07, + "loss": 0.0353, + "step": 6878, + "task_loss": 0.08056721091270447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799381879148474, + "compression_loss": 0.0, + "distillation_loss": 0.02235785312950611, + "epoch": 6.53, + "learning_rate": 5.56547583578379e-07, + "loss": 0.0225, + "step": 6879, + "task_loss": 0.023482179269194603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993830778311097, + "compression_loss": 0.0, + "distillation_loss": 0.026940112933516502, + "epoch": 6.53, + "learning_rate": 5.543140240937827e-07, + "loss": 0.0271, + "step": 6880, + "task_loss": 0.02874702401459217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993842749630566, + "compression_loss": 0.0, + "distillation_loss": 0.03313375264406204, + "epoch": 6.53, + "learning_rate": 5.520849052857768e-07, + "loss": 0.0304, + "step": 6881, + "task_loss": 0.005804285407066345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993854705453185, + "compression_loss": 0.0, + "distillation_loss": 0.05707018822431564, + "epoch": 6.54, + "learning_rate": 5.498602275592873e-07, + "loss": 0.053, + "step": 6882, + "task_loss": 0.01648840121924877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993866645788987, + "compression_loss": 0.0, + "distillation_loss": 0.042155489325523376, + "epoch": 6.54, + "learning_rate": 5.476399913184438e-07, + "loss": 0.0563, + "step": 6883, + "task_loss": 0.1832885593175888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993878570648013, + "compression_loss": 0.0, + "distillation_loss": 0.01913582533597946, + "epoch": 6.54, + "learning_rate": 5.454241969665597e-07, + "loss": 0.0177, + "step": 6884, + "task_loss": 0.005137601867318153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993890480040297, + "compression_loss": 0.0, + "distillation_loss": 0.08212343603372574, + "epoch": 6.54, + "learning_rate": 5.432128449061464e-07, + "loss": 0.085, + "step": 6885, + "task_loss": 0.11137494444847107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993902373975876, + "compression_loss": 0.0, + "distillation_loss": 0.034957289695739746, + "epoch": 6.54, + "learning_rate": 5.410059355388964e-07, + "loss": 0.0336, + "step": 6886, + "task_loss": 0.020979827269911766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993914252464789, + "compression_loss": 0.0, + "distillation_loss": 0.026353981345891953, + "epoch": 6.54, + "learning_rate": 5.388034692657223e-07, + "loss": 0.0242, + "step": 6887, + "task_loss": 0.004370139911770821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799392611551707, + "compression_loss": 0.0, + "distillation_loss": 0.037230148911476135, + "epoch": 6.54, + "learning_rate": 5.366054464867016e-07, + "loss": 0.0342, + "step": 6888, + "task_loss": 0.006543133407831192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993937963142759, + "compression_loss": 0.0, + "distillation_loss": 0.019730228930711746, + "epoch": 6.54, + "learning_rate": 5.344118676011172e-07, + "loss": 0.0349, + "step": 6889, + "task_loss": 0.17101413011550903 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799394979535189, + "compression_loss": 0.0, + "distillation_loss": 0.026037897914648056, + "epoch": 6.54, + "learning_rate": 5.322227330074481e-07, + "loss": 0.0328, + "step": 6890, + "task_loss": 0.09383513033390045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993961612154501, + "compression_loss": 0.0, + "distillation_loss": 0.029655098915100098, + "epoch": 6.54, + "learning_rate": 5.300380431033564e-07, + "loss": 0.0276, + "step": 6891, + "task_loss": 0.009574664756655693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993973413560629, + "compression_loss": 0.0, + "distillation_loss": 0.06856879591941833, + "epoch": 6.55, + "learning_rate": 5.278577982857025e-07, + "loss": 0.0743, + "step": 6892, + "task_loss": 0.12565253674983978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799398519958031, + "compression_loss": 0.0, + "distillation_loss": 0.049631062895059586, + "epoch": 6.55, + "learning_rate": 5.25681998950539e-07, + "loss": 0.046, + "step": 6893, + "task_loss": 0.013276774436235428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7993996970223581, + "compression_loss": 0.0, + "distillation_loss": 0.05079222470521927, + "epoch": 6.55, + "learning_rate": 5.235106454931083e-07, + "loss": 0.0519, + "step": 6894, + "task_loss": 0.06189654394984245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799400872550048, + "compression_loss": 0.0, + "distillation_loss": 0.017339782789349556, + "epoch": 6.55, + "learning_rate": 5.213437383078501e-07, + "loss": 0.0241, + "step": 6895, + "task_loss": 0.08445850759744644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994020465421043, + "compression_loss": 0.0, + "distillation_loss": 0.08581778407096863, + "epoch": 6.55, + "learning_rate": 5.191812777883915e-07, + "loss": 0.1178, + "step": 6896, + "task_loss": 0.40567508339881897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994032189995306, + "compression_loss": 0.0, + "distillation_loss": 0.02632337063550949, + "epoch": 6.55, + "learning_rate": 5.170232643275541e-07, + "loss": 0.0352, + "step": 6897, + "task_loss": 0.11460480093955994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994043899233307, + "compression_loss": 0.0, + "distillation_loss": 0.09149836003780365, + "epoch": 6.55, + "learning_rate": 5.148696983173551e-07, + "loss": 0.0872, + "step": 6898, + "task_loss": 0.048190876841545105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994055593145083, + "compression_loss": 0.0, + "distillation_loss": 0.08621042221784592, + "epoch": 6.55, + "learning_rate": 5.127205801489954e-07, + "loss": 0.0784, + "step": 6899, + "task_loss": 0.0076434426009655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799406727174067, + "compression_loss": 0.0, + "distillation_loss": 0.03059300407767296, + "epoch": 6.55, + "learning_rate": 5.105759102128738e-07, + "loss": 0.0345, + "step": 6900, + "task_loss": 0.06991107016801834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994078935030104, + "compression_loss": 0.0, + "distillation_loss": 0.01730835810303688, + "epoch": 6.55, + "learning_rate": 5.084356888985814e-07, + "loss": 0.0217, + "step": 6901, + "task_loss": 0.06103728711605072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994090583023424, + "compression_loss": 0.0, + "distillation_loss": 0.03369038552045822, + "epoch": 6.55, + "learning_rate": 5.062999165948989e-07, + "loss": 0.04, + "step": 6902, + "task_loss": 0.09683724492788315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994102215730666, + "compression_loss": 0.0, + "distillation_loss": 0.057407524436712265, + "epoch": 6.56, + "learning_rate": 5.041685936897966e-07, + "loss": 0.0604, + "step": 6903, + "task_loss": 0.08716972172260284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994113833161866, + "compression_loss": 0.0, + "distillation_loss": 0.06570513546466827, + "epoch": 6.56, + "learning_rate": 5.020417205704453e-07, + "loss": 0.0642, + "step": 6904, + "task_loss": 0.0507512241601944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994125435327062, + "compression_loss": 0.0, + "distillation_loss": 0.017435505986213684, + "epoch": 6.56, + "learning_rate": 4.999192976231998e-07, + "loss": 0.026, + "step": 6905, + "task_loss": 0.10334107279777527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799413702223629, + "compression_loss": 0.0, + "distillation_loss": 0.0625772476196289, + "epoch": 6.56, + "learning_rate": 4.978013252336072e-07, + "loss": 0.0609, + "step": 6906, + "task_loss": 0.04585137218236923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994148593899586, + "compression_loss": 0.0, + "distillation_loss": 0.07100007683038712, + "epoch": 6.56, + "learning_rate": 4.956878037864043e-07, + "loss": 0.0801, + "step": 6907, + "task_loss": 0.1624176800251007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994160150326988, + "compression_loss": 0.0, + "distillation_loss": 0.0802350789308548, + "epoch": 6.56, + "learning_rate": 4.935787336655285e-07, + "loss": 0.0845, + "step": 6908, + "task_loss": 0.12317005544900894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994171691528533, + "compression_loss": 0.0, + "distillation_loss": 0.024071305990219116, + "epoch": 6.56, + "learning_rate": 4.914741152541008e-07, + "loss": 0.0222, + "step": 6909, + "task_loss": 0.005024924874305725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994183217514258, + "compression_loss": 0.0, + "distillation_loss": 0.10285592824220657, + "epoch": 6.56, + "learning_rate": 4.893739489344323e-07, + "loss": 0.1003, + "step": 6910, + "task_loss": 0.07742301374673843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994194728294198, + "compression_loss": 0.0, + "distillation_loss": 0.0873519778251648, + "epoch": 6.56, + "learning_rate": 4.872782350880317e-07, + "loss": 0.0973, + "step": 6911, + "task_loss": 0.18709777295589447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994206223878393, + "compression_loss": 0.0, + "distillation_loss": 0.02852596715092659, + "epoch": 6.56, + "learning_rate": 4.851869740955944e-07, + "loss": 0.0342, + "step": 6912, + "task_loss": 0.08523042500019073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994217704276876, + "compression_loss": 0.0, + "distillation_loss": 0.15731999278068542, + "epoch": 6.57, + "learning_rate": 4.831001663370083e-07, + "loss": 0.1699, + "step": 6913, + "task_loss": 0.2830032706260681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994229169499687, + "compression_loss": 0.0, + "distillation_loss": 0.04235370457172394, + "epoch": 6.57, + "learning_rate": 4.810178121913478e-07, + "loss": 0.0412, + "step": 6914, + "task_loss": 0.030333248898386955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994240619556862, + "compression_loss": 0.0, + "distillation_loss": 0.07360881567001343, + "epoch": 6.57, + "learning_rate": 4.78939912036891e-07, + "loss": 0.0884, + "step": 6915, + "task_loss": 0.22195890545845032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994252054458436, + "compression_loss": 0.0, + "distillation_loss": 0.017446476966142654, + "epoch": 6.57, + "learning_rate": 4.768664662510941e-07, + "loss": 0.016, + "step": 6916, + "task_loss": 0.0031855106353759766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994263474214448, + "compression_loss": 0.0, + "distillation_loss": 0.023602165281772614, + "epoch": 6.57, + "learning_rate": 4.7479747521060324e-07, + "loss": 0.0265, + "step": 6917, + "task_loss": 0.05219149589538574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994274878834934, + "compression_loss": 0.0, + "distillation_loss": 0.09219710528850555, + "epoch": 6.57, + "learning_rate": 4.727329392912705e-07, + "loss": 0.0911, + "step": 6918, + "task_loss": 0.08144529163837433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799428626832993, + "compression_loss": 0.0, + "distillation_loss": 0.08491960167884827, + "epoch": 6.57, + "learning_rate": 4.706728588681236e-07, + "loss": 0.0811, + "step": 6919, + "task_loss": 0.04681730642914772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994297642709475, + "compression_loss": 0.0, + "distillation_loss": 0.10116090625524521, + "epoch": 6.57, + "learning_rate": 4.6861723431538276e-07, + "loss": 0.0987, + "step": 6920, + "task_loss": 0.0767616331577301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994309001983604, + "compression_loss": 0.0, + "distillation_loss": 0.012731763534247875, + "epoch": 6.57, + "learning_rate": 4.665660660064686e-07, + "loss": 0.0123, + "step": 6921, + "task_loss": 0.008857103064656258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994320346162354, + "compression_loss": 0.0, + "distillation_loss": 0.02278589829802513, + "epoch": 6.57, + "learning_rate": 4.6451935431398306e-07, + "loss": 0.021, + "step": 6922, + "task_loss": 0.004962848499417305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994331675255762, + "compression_loss": 0.0, + "distillation_loss": 0.04454980418086052, + "epoch": 6.57, + "learning_rate": 4.6247709960972053e-07, + "loss": 0.0438, + "step": 6923, + "task_loss": 0.03726205229759216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994342989273865, + "compression_loss": 0.0, + "distillation_loss": 0.07946009188890457, + "epoch": 6.58, + "learning_rate": 4.604393022646647e-07, + "loss": 0.0817, + "step": 6924, + "task_loss": 0.10193922370672226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79943542882267, + "compression_loss": 0.0, + "distillation_loss": 0.03449847549200058, + "epoch": 6.58, + "learning_rate": 4.584059626489973e-07, + "loss": 0.033, + "step": 6925, + "task_loss": 0.01942656934261322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994365572124305, + "compression_loss": 0.0, + "distillation_loss": 0.027452457696199417, + "epoch": 6.58, + "learning_rate": 4.5637708113207834e-07, + "loss": 0.0329, + "step": 6926, + "task_loss": 0.08229565620422363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994376840976714, + "compression_loss": 0.0, + "distillation_loss": 0.014757486060261726, + "epoch": 6.58, + "learning_rate": 4.5435265808246585e-07, + "loss": 0.0137, + "step": 6927, + "task_loss": 0.0037736501544713974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994388094793965, + "compression_loss": 0.0, + "distillation_loss": 0.07772176712751389, + "epoch": 6.58, + "learning_rate": 4.5233269386790734e-07, + "loss": 0.0742, + "step": 6928, + "task_loss": 0.042066704481840134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994399333586095, + "compression_loss": 0.0, + "distillation_loss": 0.08422841131687164, + "epoch": 6.58, + "learning_rate": 4.5031718885533423e-07, + "loss": 0.083, + "step": 6929, + "task_loss": 0.07235755026340485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994410557363142, + "compression_loss": 0.0, + "distillation_loss": 0.04439088702201843, + "epoch": 6.58, + "learning_rate": 4.483061434108815e-07, + "loss": 0.0417, + "step": 6930, + "task_loss": 0.017775850370526314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799442176613514, + "compression_loss": 0.0, + "distillation_loss": 0.03447749838232994, + "epoch": 6.58, + "learning_rate": 4.462995578998569e-07, + "loss": 0.058, + "step": 6931, + "task_loss": 0.2692142724990845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994432959912129, + "compression_loss": 0.0, + "distillation_loss": 0.06817720085382462, + "epoch": 6.58, + "learning_rate": 4.4429743268676884e-07, + "loss": 0.0724, + "step": 6932, + "task_loss": 0.11086969822645187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994444138704144, + "compression_loss": 0.0, + "distillation_loss": 0.05889270454645157, + "epoch": 6.58, + "learning_rate": 4.4229976813531806e-07, + "loss": 0.057, + "step": 6933, + "task_loss": 0.040453821420669556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994455302521222, + "compression_loss": 0.0, + "distillation_loss": 0.02058131992816925, + "epoch": 6.58, + "learning_rate": 4.403065646083809e-07, + "loss": 0.019, + "step": 6934, + "task_loss": 0.00488920696079731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79944664513734, + "compression_loss": 0.0, + "distillation_loss": 0.03927480801939964, + "epoch": 6.59, + "learning_rate": 4.383178224680401e-07, + "loss": 0.0447, + "step": 6935, + "task_loss": 0.09392157196998596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994477585270715, + "compression_loss": 0.0, + "distillation_loss": 0.04294563829898834, + "epoch": 6.59, + "learning_rate": 4.3633354207555653e-07, + "loss": 0.0536, + "step": 6936, + "task_loss": 0.1493779420852661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994488704223204, + "compression_loss": 0.0, + "distillation_loss": 0.015475824475288391, + "epoch": 6.59, + "learning_rate": 4.3435372379138085e-07, + "loss": 0.0143, + "step": 6937, + "task_loss": 0.0033792592585086823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994499808240902, + "compression_loss": 0.0, + "distillation_loss": 0.017580023035407066, + "epoch": 6.59, + "learning_rate": 4.3237836797516417e-07, + "loss": 0.0163, + "step": 6938, + "task_loss": 0.004325071349740028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994510897333849, + "compression_loss": 0.0, + "distillation_loss": 0.024261541664600372, + "epoch": 6.59, + "learning_rate": 4.304074749857362e-07, + "loss": 0.0256, + "step": 6939, + "task_loss": 0.037375908344984055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799452197151208, + "compression_loss": 0.0, + "distillation_loss": 0.010520851239562035, + "epoch": 6.59, + "learning_rate": 4.284410451811188e-07, + "loss": 0.0194, + "step": 6940, + "task_loss": 0.09917841851711273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994533030785631, + "compression_loss": 0.0, + "distillation_loss": 0.04114697501063347, + "epoch": 6.59, + "learning_rate": 4.2647907891852357e-07, + "loss": 0.0494, + "step": 6941, + "task_loss": 0.12371751666069031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994544075164539, + "compression_loss": 0.0, + "distillation_loss": 0.02658040262758732, + "epoch": 6.59, + "learning_rate": 4.2452157655435145e-07, + "loss": 0.0263, + "step": 6942, + "task_loss": 0.023485524579882622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994555104658843, + "compression_loss": 0.0, + "distillation_loss": 0.01725972816348076, + "epoch": 6.59, + "learning_rate": 4.225685384441902e-07, + "loss": 0.0161, + "step": 6943, + "task_loss": 0.005400681868195534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994566119278578, + "compression_loss": 0.0, + "distillation_loss": 0.03645411878824234, + "epoch": 6.59, + "learning_rate": 4.2061996494282e-07, + "loss": 0.0406, + "step": 6944, + "task_loss": 0.07785525172948837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994577119033781, + "compression_loss": 0.0, + "distillation_loss": 0.0437319241464138, + "epoch": 6.6, + "learning_rate": 4.1867585640421036e-07, + "loss": 0.0513, + "step": 6945, + "task_loss": 0.11954252421855927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994588103934489, + "compression_loss": 0.0, + "distillation_loss": 0.040995851159095764, + "epoch": 6.6, + "learning_rate": 4.16736213181515e-07, + "loss": 0.0448, + "step": 6946, + "task_loss": 0.0789896547794342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994599073990739, + "compression_loss": 0.0, + "distillation_loss": 0.0845775455236435, + "epoch": 6.6, + "learning_rate": 4.148010356270826e-07, + "loss": 0.0876, + "step": 6947, + "task_loss": 0.11447571963071823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994610029212568, + "compression_loss": 0.0, + "distillation_loss": 0.019938606768846512, + "epoch": 6.6, + "learning_rate": 4.128703240924431e-07, + "loss": 0.0185, + "step": 6948, + "task_loss": 0.00549742765724659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994620969610012, + "compression_loss": 0.0, + "distillation_loss": 0.01663760282099247, + "epoch": 6.6, + "learning_rate": 4.109440789283242e-07, + "loss": 0.0323, + "step": 6949, + "task_loss": 0.17326664924621582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994631895193108, + "compression_loss": 0.0, + "distillation_loss": 0.049342524260282516, + "epoch": 6.6, + "learning_rate": 4.0902230048463495e-07, + "loss": 0.0508, + "step": 6950, + "task_loss": 0.0635058730840683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994642805971894, + "compression_loss": 0.0, + "distillation_loss": 0.06009483337402344, + "epoch": 6.6, + "learning_rate": 4.071049891104739e-07, + "loss": 0.0572, + "step": 6951, + "task_loss": 0.031034225597977638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994653701956406, + "compression_loss": 0.0, + "distillation_loss": 0.03527653217315674, + "epoch": 6.6, + "learning_rate": 4.0519214515413463e-07, + "loss": 0.0381, + "step": 6952, + "task_loss": 0.06350395828485489 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799466458315668, + "compression_loss": 0.0, + "distillation_loss": 0.056782066822052, + "epoch": 6.6, + "learning_rate": 4.0328376896309473e-07, + "loss": 0.0651, + "step": 6953, + "task_loss": 0.13970398902893066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994675449582754, + "compression_loss": 0.0, + "distillation_loss": 0.08646276593208313, + "epoch": 6.6, + "learning_rate": 4.01379860884013e-07, + "loss": 0.0818, + "step": 6954, + "task_loss": 0.040067270398139954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994686301244665, + "compression_loss": 0.0, + "distillation_loss": 0.08508466184139252, + "epoch": 6.6, + "learning_rate": 3.994804212627462e-07, + "loss": 0.0789, + "step": 6955, + "task_loss": 0.02337714470922947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994697138152448, + "compression_loss": 0.0, + "distillation_loss": 0.04727325588464737, + "epoch": 6.61, + "learning_rate": 3.975854504443433e-07, + "loss": 0.0431, + "step": 6956, + "task_loss": 0.005318284034729004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994707960316142, + "compression_loss": 0.0, + "distillation_loss": 0.025855958461761475, + "epoch": 6.61, + "learning_rate": 3.956949487730288e-07, + "loss": 0.0347, + "step": 6957, + "task_loss": 0.11433203518390656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994718767745782, + "compression_loss": 0.0, + "distillation_loss": 0.060321077704429626, + "epoch": 6.61, + "learning_rate": 3.9380891659221986e-07, + "loss": 0.0617, + "step": 6958, + "task_loss": 0.07363105565309525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994729560451407, + "compression_loss": 0.0, + "distillation_loss": 0.034057214856147766, + "epoch": 6.61, + "learning_rate": 3.9192735424452843e-07, + "loss": 0.0396, + "step": 6959, + "task_loss": 0.08952410519123077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994740338443052, + "compression_loss": 0.0, + "distillation_loss": 0.022792372852563858, + "epoch": 6.61, + "learning_rate": 3.900502620717478e-07, + "loss": 0.0222, + "step": 6960, + "task_loss": 0.017220191657543182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994751101730754, + "compression_loss": 0.0, + "distillation_loss": 0.02029343880712986, + "epoch": 6.61, + "learning_rate": 3.881776404148552e-07, + "loss": 0.0242, + "step": 6961, + "task_loss": 0.059222202748060226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799476185032455, + "compression_loss": 0.0, + "distillation_loss": 0.026080617681145668, + "epoch": 6.61, + "learning_rate": 3.8630948961403125e-07, + "loss": 0.0244, + "step": 6962, + "task_loss": 0.00916319526731968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994772584234476, + "compression_loss": 0.0, + "distillation_loss": 0.02705274149775505, + "epoch": 6.61, + "learning_rate": 3.844458100086268e-07, + "loss": 0.0329, + "step": 6963, + "task_loss": 0.08544865250587463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994783303470572, + "compression_loss": 0.0, + "distillation_loss": 0.02477836422622204, + "epoch": 6.61, + "learning_rate": 3.8258660193719044e-07, + "loss": 0.0374, + "step": 6964, + "task_loss": 0.15073972940444946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994794008042871, + "compression_loss": 0.0, + "distillation_loss": 0.022429198026657104, + "epoch": 6.61, + "learning_rate": 3.8073186573745757e-07, + "loss": 0.0208, + "step": 6965, + "task_loss": 0.006342481821775436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994804697961412, + "compression_loss": 0.0, + "distillation_loss": 0.08988398313522339, + "epoch": 6.62, + "learning_rate": 3.7888160174634757e-07, + "loss": 0.0898, + "step": 6966, + "task_loss": 0.08916927129030228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994815373236231, + "compression_loss": 0.0, + "distillation_loss": 0.028484515845775604, + "epoch": 6.62, + "learning_rate": 3.7703581029997215e-07, + "loss": 0.0278, + "step": 6967, + "task_loss": 0.022118881344795227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994826033877366, + "compression_loss": 0.0, + "distillation_loss": 0.0302952341735363, + "epoch": 6.62, + "learning_rate": 3.75194491733627e-07, + "loss": 0.028, + "step": 6968, + "task_loss": 0.0076552219688892365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994836679894852, + "compression_loss": 0.0, + "distillation_loss": 0.03270921856164932, + "epoch": 6.62, + "learning_rate": 3.733576463817973e-07, + "loss": 0.0308, + "step": 6969, + "task_loss": 0.013129375874996185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994847311298727, + "compression_loss": 0.0, + "distillation_loss": 0.052680712193250656, + "epoch": 6.62, + "learning_rate": 3.71525274578155e-07, + "loss": 0.0622, + "step": 6970, + "task_loss": 0.1481049656867981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994857928099028, + "compression_loss": 0.0, + "distillation_loss": 0.028156310319900513, + "epoch": 6.62, + "learning_rate": 3.696973766555589e-07, + "loss": 0.0263, + "step": 6971, + "task_loss": 0.009480351582169533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994868530305791, + "compression_loss": 0.0, + "distillation_loss": 0.036262333393096924, + "epoch": 6.62, + "learning_rate": 3.6787395294605455e-07, + "loss": 0.0339, + "step": 6972, + "task_loss": 0.012746721506118774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994879117929053, + "compression_loss": 0.0, + "distillation_loss": 0.08903949707746506, + "epoch": 6.62, + "learning_rate": 3.660550037808741e-07, + "loss": 0.1021, + "step": 6973, + "task_loss": 0.21985527873039246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994889690978851, + "compression_loss": 0.0, + "distillation_loss": 0.03600417822599411, + "epoch": 6.62, + "learning_rate": 3.642405294904422e-07, + "loss": 0.0337, + "step": 6974, + "task_loss": 0.01251133345067501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994900249465222, + "compression_loss": 0.0, + "distillation_loss": 0.02271505631506443, + "epoch": 6.62, + "learning_rate": 3.624305304043646e-07, + "loss": 0.0211, + "step": 6975, + "task_loss": 0.006729325279593468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994910793398202, + "compression_loss": 0.0, + "distillation_loss": 0.04386524111032486, + "epoch": 6.62, + "learning_rate": 3.606250068514394e-07, + "loss": 0.048, + "step": 6976, + "task_loss": 0.08489131927490234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799492132278783, + "compression_loss": 0.0, + "distillation_loss": 0.03540867939591408, + "epoch": 6.63, + "learning_rate": 3.5882395915964315e-07, + "loss": 0.0324, + "step": 6977, + "task_loss": 0.005433699116110802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799493183764414, + "compression_loss": 0.0, + "distillation_loss": 0.04147842526435852, + "epoch": 6.63, + "learning_rate": 3.570273876561475e-07, + "loss": 0.0387, + "step": 6978, + "task_loss": 0.013455674052238464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994942337977171, + "compression_loss": 0.0, + "distillation_loss": 0.043742306530475616, + "epoch": 6.63, + "learning_rate": 3.552352926673136e-07, + "loss": 0.0518, + "step": 6979, + "task_loss": 0.12435872107744217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994952823796958, + "compression_loss": 0.0, + "distillation_loss": 0.04596320539712906, + "epoch": 6.63, + "learning_rate": 3.5344767451867545e-07, + "loss": 0.0448, + "step": 6980, + "task_loss": 0.03420111536979675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799496329511354, + "compression_loss": 0.0, + "distillation_loss": 0.09754104167222977, + "epoch": 6.63, + "learning_rate": 3.51664533534965e-07, + "loss": 0.1004, + "step": 6981, + "task_loss": 0.1259613186120987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994973751936951, + "compression_loss": 0.0, + "distillation_loss": 0.014837587252259254, + "epoch": 6.63, + "learning_rate": 3.498858700401009e-07, + "loss": 0.0224, + "step": 6982, + "task_loss": 0.09078174829483032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799498419427723, + "compression_loss": 0.0, + "distillation_loss": 0.07896040380001068, + "epoch": 6.63, + "learning_rate": 3.481116843571858e-07, + "loss": 0.0744, + "step": 6983, + "task_loss": 0.03332065790891647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7994994622144413, + "compression_loss": 0.0, + "distillation_loss": 0.05292394757270813, + "epoch": 6.63, + "learning_rate": 3.463419768085091e-07, + "loss": 0.0551, + "step": 6984, + "task_loss": 0.07501198351383209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995005035548538, + "compression_loss": 0.0, + "distillation_loss": 0.019998980686068535, + "epoch": 6.63, + "learning_rate": 3.4457674771554425e-07, + "loss": 0.0186, + "step": 6985, + "task_loss": 0.005534190684556961 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799501543449964, + "compression_loss": 0.0, + "distillation_loss": 0.03253600001335144, + "epoch": 6.63, + "learning_rate": 3.428159973989542e-07, + "loss": 0.0317, + "step": 6986, + "task_loss": 0.024117592722177505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995025819007757, + "compression_loss": 0.0, + "distillation_loss": 0.02256142720580101, + "epoch": 6.64, + "learning_rate": 3.4105972617859136e-07, + "loss": 0.0233, + "step": 6987, + "task_loss": 0.029686935245990753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995036189082926, + "compression_loss": 0.0, + "distillation_loss": 0.02714642882347107, + "epoch": 6.64, + "learning_rate": 3.3930793437348675e-07, + "loss": 0.0258, + "step": 6988, + "task_loss": 0.013444697484374046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995046544735183, + "compression_loss": 0.0, + "distillation_loss": 0.08077394962310791, + "epoch": 6.64, + "learning_rate": 3.3756062230186067e-07, + "loss": 0.0878, + "step": 6989, + "task_loss": 0.15142491459846497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995056885974564, + "compression_loss": 0.0, + "distillation_loss": 0.10645799338817596, + "epoch": 6.64, + "learning_rate": 3.358177902811233e-07, + "loss": 0.1093, + "step": 6990, + "task_loss": 0.134621262550354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995067212811109, + "compression_loss": 0.0, + "distillation_loss": 0.020988423377275467, + "epoch": 6.64, + "learning_rate": 3.340794386278712e-07, + "loss": 0.0284, + "step": 6991, + "task_loss": 0.094853974878788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995077525254851, + "compression_loss": 0.0, + "distillation_loss": 0.05726141855120659, + "epoch": 6.64, + "learning_rate": 3.3234556765787963e-07, + "loss": 0.0633, + "step": 6992, + "task_loss": 0.1174088567495346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799508782331583, + "compression_loss": 0.0, + "distillation_loss": 0.05345267802476883, + "epoch": 6.64, + "learning_rate": 3.306161776861161e-07, + "loss": 0.0579, + "step": 6993, + "task_loss": 0.0983385294675827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799509810700408, + "compression_loss": 0.0, + "distillation_loss": 0.0895034670829773, + "epoch": 6.64, + "learning_rate": 3.288912690267348e-07, + "loss": 0.0974, + "step": 6994, + "task_loss": 0.16841556131839752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799510837632964, + "compression_loss": 0.0, + "distillation_loss": 0.020515451207756996, + "epoch": 6.64, + "learning_rate": 3.2717084199307134e-07, + "loss": 0.0191, + "step": 6995, + "task_loss": 0.006037186831235886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995118631302546, + "compression_loss": 0.0, + "distillation_loss": 0.02419309876859188, + "epoch": 6.64, + "learning_rate": 3.2545489689764784e-07, + "loss": 0.0414, + "step": 6996, + "task_loss": 0.19664430618286133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995128871932835, + "compression_loss": 0.0, + "distillation_loss": 0.0171881765127182, + "epoch": 6.64, + "learning_rate": 3.237434340521789e-07, + "loss": 0.0199, + "step": 6997, + "task_loss": 0.044096946716308594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995139098230544, + "compression_loss": 0.0, + "distillation_loss": 0.07256430387496948, + "epoch": 6.65, + "learning_rate": 3.220364537675574e-07, + "loss": 0.0694, + "step": 6998, + "task_loss": 0.04073936119675636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995149310205709, + "compression_loss": 0.0, + "distillation_loss": 0.07214680314064026, + "epoch": 6.65, + "learning_rate": 3.203339563538632e-07, + "loss": 0.067, + "step": 6999, + "task_loss": 0.021014513447880745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995159507868367, + "compression_loss": 0.0, + "distillation_loss": 0.03768681734800339, + "epoch": 6.65, + "learning_rate": 3.1863594212036274e-07, + "loss": 0.0393, + "step": 7000, + "task_loss": 0.053721100091934204 + }, + { + "epoch": 6.65, + "eval_accuracy": 0.8922018348623854, + "eval_loss": 0.4350743889808655, + "eval_runtime": 18.1267, + "eval_samples_per_second": 48.106, + "eval_steps_per_second": 6.013, + "step": 7000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995169691228556, + "compression_loss": 0.0, + "distillation_loss": 0.049745362251996994, + "epoch": 6.65, + "learning_rate": 3.1694241137551203e-07, + "loss": 0.0466, + "step": 7001, + "task_loss": 0.018015502020716667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995179860296312, + "compression_loss": 0.0, + "distillation_loss": 0.029364963993430138, + "epoch": 6.65, + "learning_rate": 3.1525336442694843e-07, + "loss": 0.0338, + "step": 7002, + "task_loss": 0.07397031784057617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995190015081671, + "compression_loss": 0.0, + "distillation_loss": 0.02187550999224186, + "epoch": 6.65, + "learning_rate": 3.1356880158149025e-07, + "loss": 0.0202, + "step": 7003, + "task_loss": 0.0046534184366464615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995200155594672, + "compression_loss": 0.0, + "distillation_loss": 0.013252021744847298, + "epoch": 6.65, + "learning_rate": 3.118887231451539e-07, + "loss": 0.0126, + "step": 7004, + "task_loss": 0.006522929295897484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995210281845349, + "compression_loss": 0.0, + "distillation_loss": 0.039753131568431854, + "epoch": 6.65, + "learning_rate": 3.1021312942313144e-07, + "loss": 0.0451, + "step": 7005, + "task_loss": 0.09337079524993896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799522039384374, + "compression_loss": 0.0, + "distillation_loss": 0.11256477236747742, + "epoch": 6.65, + "learning_rate": 3.0854202071979865e-07, + "loss": 0.1108, + "step": 7006, + "task_loss": 0.09483619034290314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995230491599884, + "compression_loss": 0.0, + "distillation_loss": 0.024704527109861374, + "epoch": 6.65, + "learning_rate": 3.0687539733872115e-07, + "loss": 0.0228, + "step": 7007, + "task_loss": 0.006121648475527763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995240575123814, + "compression_loss": 0.0, + "distillation_loss": 0.030473047867417336, + "epoch": 6.66, + "learning_rate": 3.05213259582654e-07, + "loss": 0.0289, + "step": 7008, + "task_loss": 0.015124401077628136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799525064442557, + "compression_loss": 0.0, + "distillation_loss": 0.021761227399110794, + "epoch": 6.66, + "learning_rate": 3.035556077535306e-07, + "loss": 0.0294, + "step": 7009, + "task_loss": 0.09853780269622803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995260699515188, + "compression_loss": 0.0, + "distillation_loss": 0.03172404319047928, + "epoch": 6.66, + "learning_rate": 3.0190244215246857e-07, + "loss": 0.0289, + "step": 7010, + "task_loss": 0.003925886005163193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995270740402703, + "compression_loss": 0.0, + "distillation_loss": 0.062315717339515686, + "epoch": 6.66, + "learning_rate": 3.002537630797747e-07, + "loss": 0.0584, + "step": 7011, + "task_loss": 0.02332870475947857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995280767098154, + "compression_loss": 0.0, + "distillation_loss": 0.03596599027514458, + "epoch": 6.66, + "learning_rate": 2.986095708349429e-07, + "loss": 0.034, + "step": 7012, + "task_loss": 0.01661401055753231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995290779611577, + "compression_loss": 0.0, + "distillation_loss": 0.03480362519621849, + "epoch": 6.66, + "learning_rate": 2.9696986571664253e-07, + "loss": 0.0327, + "step": 7013, + "task_loss": 0.013543521985411644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995300777953008, + "compression_loss": 0.0, + "distillation_loss": 0.024172749370336533, + "epoch": 6.66, + "learning_rate": 2.953346480227409e-07, + "loss": 0.0245, + "step": 7014, + "task_loss": 0.027264190837740898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995310762132486, + "compression_loss": 0.0, + "distillation_loss": 0.02965361624956131, + "epoch": 6.66, + "learning_rate": 2.937039180502782e-07, + "loss": 0.0448, + "step": 7015, + "task_loss": 0.18084561824798584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995320732160046, + "compression_loss": 0.0, + "distillation_loss": 0.0646897479891777, + "epoch": 6.66, + "learning_rate": 2.920776760954813e-07, + "loss": 0.0833, + "step": 7016, + "task_loss": 0.2510213255882263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995330688045724, + "compression_loss": 0.0, + "distillation_loss": 0.03607410565018654, + "epoch": 6.66, + "learning_rate": 2.904559224537723e-07, + "loss": 0.0336, + "step": 7017, + "task_loss": 0.011309439316391945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799534062979956, + "compression_loss": 0.0, + "distillation_loss": 0.0305813979357481, + "epoch": 6.66, + "learning_rate": 2.888386574197488e-07, + "loss": 0.0356, + "step": 7018, + "task_loss": 0.08079958707094193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995350557431589, + "compression_loss": 0.0, + "distillation_loss": 0.022271297872066498, + "epoch": 6.67, + "learning_rate": 2.872258812871925e-07, + "loss": 0.0463, + "step": 7019, + "task_loss": 0.26298779249191284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995360470951847, + "compression_loss": 0.0, + "distillation_loss": 0.03892994672060013, + "epoch": 6.67, + "learning_rate": 2.8561759434907185e-07, + "loss": 0.0363, + "step": 7020, + "task_loss": 0.012905815616250038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799537037037037, + "compression_loss": 0.0, + "distillation_loss": 0.02603466808795929, + "epoch": 6.67, + "learning_rate": 2.84013796897542e-07, + "loss": 0.0408, + "step": 7021, + "task_loss": 0.17337138950824738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995380255697199, + "compression_loss": 0.0, + "distillation_loss": 0.024577513337135315, + "epoch": 6.67, + "learning_rate": 2.8241448922393945e-07, + "loss": 0.0225, + "step": 7022, + "task_loss": 0.003398273140192032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995390126942367, + "compression_loss": 0.0, + "distillation_loss": 0.048275694251060486, + "epoch": 6.67, + "learning_rate": 2.8081967161878443e-07, + "loss": 0.053, + "step": 7023, + "task_loss": 0.09526211768388748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995399984115912, + "compression_loss": 0.0, + "distillation_loss": 0.029721971601247787, + "epoch": 6.67, + "learning_rate": 2.7922934437178695e-07, + "loss": 0.043, + "step": 7024, + "task_loss": 0.16234955191612244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995409827227872, + "compression_loss": 0.0, + "distillation_loss": 0.04240930825471878, + "epoch": 6.67, + "learning_rate": 2.7764350777183533e-07, + "loss": 0.0402, + "step": 7025, + "task_loss": 0.02065758965909481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995419656288282, + "compression_loss": 0.0, + "distillation_loss": 0.08198815584182739, + "epoch": 6.67, + "learning_rate": 2.760621621070047e-07, + "loss": 0.0865, + "step": 7026, + "task_loss": 0.126779243350029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995429471307178, + "compression_loss": 0.0, + "distillation_loss": 0.022945459932088852, + "epoch": 6.67, + "learning_rate": 2.744853076645515e-07, + "loss": 0.0216, + "step": 7027, + "task_loss": 0.009481718763709068 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79954392722946, + "compression_loss": 0.0, + "distillation_loss": 0.0208904929459095, + "epoch": 6.67, + "learning_rate": 2.729129447309242e-07, + "loss": 0.0194, + "step": 7028, + "task_loss": 0.006270444020628929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995449059260583, + "compression_loss": 0.0, + "distillation_loss": 0.05013927444815636, + "epoch": 6.68, + "learning_rate": 2.713450735917472e-07, + "loss": 0.0459, + "step": 7029, + "task_loss": 0.007837006822228432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995458832215164, + "compression_loss": 0.0, + "distillation_loss": 0.04295579344034195, + "epoch": 6.68, + "learning_rate": 2.6978169453183153e-07, + "loss": 0.0415, + "step": 7030, + "task_loss": 0.028465719893574715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799546859116838, + "compression_loss": 0.0, + "distillation_loss": 0.03028765879571438, + "epoch": 6.68, + "learning_rate": 2.6822280783517504e-07, + "loss": 0.0279, + "step": 7031, + "task_loss": 0.006683392450213432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995478336130267, + "compression_loss": 0.0, + "distillation_loss": 0.025525229051709175, + "epoch": 6.68, + "learning_rate": 2.66668413784954e-07, + "loss": 0.0238, + "step": 7032, + "task_loss": 0.008154524490237236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995488067110863, + "compression_loss": 0.0, + "distillation_loss": 0.020436950027942657, + "epoch": 6.68, + "learning_rate": 2.651185126635314e-07, + "loss": 0.0213, + "step": 7033, + "task_loss": 0.029449906200170517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995497784120204, + "compression_loss": 0.0, + "distillation_loss": 0.020888283848762512, + "epoch": 6.68, + "learning_rate": 2.635731047524542e-07, + "loss": 0.0276, + "step": 7034, + "task_loss": 0.08772215992212296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995507487168327, + "compression_loss": 0.0, + "distillation_loss": 0.057622358202934265, + "epoch": 6.68, + "learning_rate": 2.620321903324563e-07, + "loss": 0.0553, + "step": 7035, + "task_loss": 0.034089043736457825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995517176265269, + "compression_loss": 0.0, + "distillation_loss": 0.021918507292866707, + "epoch": 6.68, + "learning_rate": 2.6049576968345256e-07, + "loss": 0.0204, + "step": 7036, + "task_loss": 0.006641771644353867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995526851421066, + "compression_loss": 0.0, + "distillation_loss": 0.01303351204842329, + "epoch": 6.68, + "learning_rate": 2.589638430845337e-07, + "loss": 0.0124, + "step": 7037, + "task_loss": 0.00620032474398613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995536512645757, + "compression_loss": 0.0, + "distillation_loss": 0.04416292533278465, + "epoch": 6.68, + "learning_rate": 2.5743641081399094e-07, + "loss": 0.0467, + "step": 7038, + "task_loss": 0.0692160353064537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995546159949376, + "compression_loss": 0.0, + "distillation_loss": 0.044526465237140656, + "epoch": 6.68, + "learning_rate": 2.559134731492857e-07, + "loss": 0.0502, + "step": 7039, + "task_loss": 0.10125073045492172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995555793341962, + "compression_loss": 0.0, + "distillation_loss": 0.06591819226741791, + "epoch": 6.69, + "learning_rate": 2.5439503036706615e-07, + "loss": 0.0642, + "step": 7040, + "task_loss": 0.04842384532094002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995565412833551, + "compression_loss": 0.0, + "distillation_loss": 0.04874798282980919, + "epoch": 6.69, + "learning_rate": 2.5288108274316435e-07, + "loss": 0.0493, + "step": 7041, + "task_loss": 0.05393533781170845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799557501843418, + "compression_loss": 0.0, + "distillation_loss": 0.05359012633562088, + "epoch": 6.69, + "learning_rate": 2.5137163055259926e-07, + "loss": 0.0642, + "step": 7042, + "task_loss": 0.15967698395252228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995584610153885, + "compression_loss": 0.0, + "distillation_loss": 0.20667728781700134, + "epoch": 6.69, + "learning_rate": 2.4986667406956544e-07, + "loss": 0.2091, + "step": 7043, + "task_loss": 0.23065844178199768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995594188002704, + "compression_loss": 0.0, + "distillation_loss": 0.06716637313365936, + "epoch": 6.69, + "learning_rate": 2.4836621356744973e-07, + "loss": 0.0693, + "step": 7044, + "task_loss": 0.08879762887954712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995603751990673, + "compression_loss": 0.0, + "distillation_loss": 0.07034559547901154, + "epoch": 6.69, + "learning_rate": 2.468702493188174e-07, + "loss": 0.0752, + "step": 7045, + "task_loss": 0.1193457618355751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799561330212783, + "compression_loss": 0.0, + "distillation_loss": 0.02908926270902157, + "epoch": 6.69, + "learning_rate": 2.453787815954178e-07, + "loss": 0.0302, + "step": 7046, + "task_loss": 0.03969724103808403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995622838424209, + "compression_loss": 0.0, + "distillation_loss": 0.07951683551073074, + "epoch": 6.69, + "learning_rate": 2.4389181066817865e-07, + "loss": 0.0891, + "step": 7047, + "task_loss": 0.17507222294807434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799563236088985, + "compression_loss": 0.0, + "distillation_loss": 0.009979777969419956, + "epoch": 6.69, + "learning_rate": 2.4240933680722e-07, + "loss": 0.0095, + "step": 7048, + "task_loss": 0.004972290247678757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995641869534789, + "compression_loss": 0.0, + "distillation_loss": 0.014456565491855145, + "epoch": 6.69, + "learning_rate": 2.4093136028184024e-07, + "loss": 0.0133, + "step": 7049, + "task_loss": 0.003056548535823822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995651364369062, + "compression_loss": 0.0, + "distillation_loss": 0.06412947177886963, + "epoch": 6.7, + "learning_rate": 2.394578813605192e-07, + "loss": 0.0856, + "step": 7050, + "task_loss": 0.2784784734249115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995660845402706, + "compression_loss": 0.0, + "distillation_loss": 0.048369936645030975, + "epoch": 6.7, + "learning_rate": 2.3798890031092037e-07, + "loss": 0.0595, + "step": 7051, + "task_loss": 0.15925872325897217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995670312645757, + "compression_loss": 0.0, + "distillation_loss": 0.039365239441394806, + "epoch": 6.7, + "learning_rate": 2.3652441739989427e-07, + "loss": 0.0365, + "step": 7052, + "task_loss": 0.010832777246832848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995679766108255, + "compression_loss": 0.0, + "distillation_loss": 0.08087919652462006, + "epoch": 6.7, + "learning_rate": 2.3506443289347259e-07, + "loss": 0.0776, + "step": 7053, + "task_loss": 0.04851894453167915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995689205800234, + "compression_loss": 0.0, + "distillation_loss": 0.09845395386219025, + "epoch": 6.7, + "learning_rate": 2.3360894705686254e-07, + "loss": 0.1004, + "step": 7054, + "task_loss": 0.11757258325815201 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799569863173173, + "compression_loss": 0.0, + "distillation_loss": 0.020045258104801178, + "epoch": 6.7, + "learning_rate": 2.3215796015446378e-07, + "loss": 0.0187, + "step": 7055, + "task_loss": 0.006300348788499832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995708043912784, + "compression_loss": 0.0, + "distillation_loss": 0.06473547220230103, + "epoch": 6.7, + "learning_rate": 2.3071147244985713e-07, + "loss": 0.0787, + "step": 7056, + "task_loss": 0.2046293467283249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995717442353428, + "compression_loss": 0.0, + "distillation_loss": 0.08796077966690063, + "epoch": 6.7, + "learning_rate": 2.292694842057991e-07, + "loss": 0.1015, + "step": 7057, + "task_loss": 0.22374670207500458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995726827063702, + "compression_loss": 0.0, + "distillation_loss": 0.021141093224287033, + "epoch": 6.7, + "learning_rate": 2.278319956842384e-07, + "loss": 0.0362, + "step": 7058, + "task_loss": 0.17210184037685394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995736198053641, + "compression_loss": 0.0, + "distillation_loss": 0.03681035712361336, + "epoch": 6.7, + "learning_rate": 2.263990071462968e-07, + "loss": 0.0371, + "step": 7059, + "task_loss": 0.039506033062934875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995745555333283, + "compression_loss": 0.0, + "distillation_loss": 0.03608547896146774, + "epoch": 6.7, + "learning_rate": 2.2497051885228827e-07, + "loss": 0.0381, + "step": 7060, + "task_loss": 0.05643710866570473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995754898912665, + "compression_loss": 0.0, + "distillation_loss": 0.015874043107032776, + "epoch": 6.71, + "learning_rate": 2.2354653106170244e-07, + "loss": 0.015, + "step": 7061, + "task_loss": 0.006745463237166405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995764228801822, + "compression_loss": 0.0, + "distillation_loss": 0.03536041080951691, + "epoch": 6.71, + "learning_rate": 2.2212704403321572e-07, + "loss": 0.0436, + "step": 7062, + "task_loss": 0.11737397313117981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995773545010793, + "compression_loss": 0.0, + "distillation_loss": 0.20444419980049133, + "epoch": 6.71, + "learning_rate": 2.2071205802468299e-07, + "loss": 0.194, + "step": 7063, + "task_loss": 0.10004756599664688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995782847549614, + "compression_loss": 0.0, + "distillation_loss": 0.04743964970111847, + "epoch": 6.71, + "learning_rate": 2.1930157329314026e-07, + "loss": 0.0468, + "step": 7064, + "task_loss": 0.041466888040304184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995792136428321, + "compression_loss": 0.0, + "distillation_loss": 0.021672185510396957, + "epoch": 6.71, + "learning_rate": 2.178955900948132e-07, + "loss": 0.0216, + "step": 7065, + "task_loss": 0.0210067518055439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995801411656952, + "compression_loss": 0.0, + "distillation_loss": 0.04924159497022629, + "epoch": 6.71, + "learning_rate": 2.1649410868510577e-07, + "loss": 0.05, + "step": 7066, + "task_loss": 0.056573107838630676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995810673245544, + "compression_loss": 0.0, + "distillation_loss": 0.03908860683441162, + "epoch": 6.71, + "learning_rate": 2.1509712931860327e-07, + "loss": 0.0441, + "step": 7067, + "task_loss": 0.08947930485010147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995819921204131, + "compression_loss": 0.0, + "distillation_loss": 0.140781432390213, + "epoch": 6.71, + "learning_rate": 2.1370465224906655e-07, + "loss": 0.1488, + "step": 7068, + "task_loss": 0.2205652892589569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995829155542754, + "compression_loss": 0.0, + "distillation_loss": 0.06154758110642433, + "epoch": 6.71, + "learning_rate": 2.1231667772945718e-07, + "loss": 0.0578, + "step": 7069, + "task_loss": 0.02428608387708664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995838376271447, + "compression_loss": 0.0, + "distillation_loss": 0.08523669838905334, + "epoch": 6.71, + "learning_rate": 2.1093320601190124e-07, + "loss": 0.0834, + "step": 7070, + "task_loss": 0.06637626141309738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995847583400248, + "compression_loss": 0.0, + "distillation_loss": 0.058525826781988144, + "epoch": 6.72, + "learning_rate": 2.0955423734771162e-07, + "loss": 0.0756, + "step": 7071, + "task_loss": 0.2295752465724945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995856776939194, + "compression_loss": 0.0, + "distillation_loss": 0.03819255903363228, + "epoch": 6.72, + "learning_rate": 2.081797719873879e-07, + "loss": 0.0503, + "step": 7072, + "task_loss": 0.15918831527233124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799586595689832, + "compression_loss": 0.0, + "distillation_loss": 0.12596416473388672, + "epoch": 6.72, + "learning_rate": 2.0680981018060819e-07, + "loss": 0.1229, + "step": 7073, + "task_loss": 0.09560129046440125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995875123287665, + "compression_loss": 0.0, + "distillation_loss": 0.026970701292157173, + "epoch": 6.72, + "learning_rate": 2.054443521762317e-07, + "loss": 0.038, + "step": 7074, + "task_loss": 0.13767385482788086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995884276117264, + "compression_loss": 0.0, + "distillation_loss": 0.017990119755268097, + "epoch": 6.72, + "learning_rate": 2.040833982223017e-07, + "loss": 0.0198, + "step": 7075, + "task_loss": 0.036511633545160294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995893415397156, + "compression_loss": 0.0, + "distillation_loss": 0.040901899337768555, + "epoch": 6.72, + "learning_rate": 2.0272694856603991e-07, + "loss": 0.0423, + "step": 7076, + "task_loss": 0.05451485887169838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995902541137376, + "compression_loss": 0.0, + "distillation_loss": 0.04028903320431709, + "epoch": 6.72, + "learning_rate": 2.013750034538492e-07, + "loss": 0.0384, + "step": 7077, + "task_loss": 0.021871501579880714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995911653347961, + "compression_loss": 0.0, + "distillation_loss": 0.020981252193450928, + "epoch": 6.72, + "learning_rate": 2.0002756313132475e-07, + "loss": 0.0195, + "step": 7078, + "task_loss": 0.006563212722539902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995920752038949, + "compression_loss": 0.0, + "distillation_loss": 0.0342644639313221, + "epoch": 6.72, + "learning_rate": 1.9868462784322917e-07, + "loss": 0.036, + "step": 7079, + "task_loss": 0.05136824771761894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995929837220376, + "compression_loss": 0.0, + "distillation_loss": 0.1288357377052307, + "epoch": 6.72, + "learning_rate": 1.9734619783351727e-07, + "loss": 0.1282, + "step": 7080, + "task_loss": 0.12262624502182007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995938908902278, + "compression_loss": 0.0, + "distillation_loss": 0.07863984256982803, + "epoch": 6.72, + "learning_rate": 1.9601227334531956e-07, + "loss": 0.0845, + "step": 7081, + "task_loss": 0.13708055019378662 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995947967094693, + "compression_loss": 0.0, + "distillation_loss": 0.04373558238148689, + "epoch": 6.73, + "learning_rate": 1.946828546209478e-07, + "loss": 0.0465, + "step": 7082, + "task_loss": 0.07146552950143814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995957011807658, + "compression_loss": 0.0, + "distillation_loss": 0.07881873846054077, + "epoch": 6.73, + "learning_rate": 1.9335794190190327e-07, + "loss": 0.0787, + "step": 7083, + "task_loss": 0.0776781439781189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995966043051209, + "compression_loss": 0.0, + "distillation_loss": 0.023989124223589897, + "epoch": 6.73, + "learning_rate": 1.920375354288545e-07, + "loss": 0.0223, + "step": 7084, + "task_loss": 0.006650885567069054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995975060835383, + "compression_loss": 0.0, + "distillation_loss": 0.01962425746023655, + "epoch": 6.73, + "learning_rate": 1.9072163544166244e-07, + "loss": 0.0248, + "step": 7085, + "task_loss": 0.07158499956130981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995984065170217, + "compression_loss": 0.0, + "distillation_loss": 0.029367784038186073, + "epoch": 6.73, + "learning_rate": 1.89410242179372e-07, + "loss": 0.0391, + "step": 7086, + "task_loss": 0.12671338021755219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7995993056065748, + "compression_loss": 0.0, + "distillation_loss": 0.08097614347934723, + "epoch": 6.73, + "learning_rate": 1.881033558802009e-07, + "loss": 0.0883, + "step": 7087, + "task_loss": 0.15451526641845703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996002033532013, + "compression_loss": 0.0, + "distillation_loss": 0.09910960495471954, + "epoch": 6.73, + "learning_rate": 1.8680097678154817e-07, + "loss": 0.1072, + "step": 7088, + "task_loss": 0.17986391484737396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996010997579047, + "compression_loss": 0.0, + "distillation_loss": 0.03139527514576912, + "epoch": 6.73, + "learning_rate": 1.8550310511999958e-07, + "loss": 0.0354, + "step": 7089, + "task_loss": 0.0717248022556305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996019948216889, + "compression_loss": 0.0, + "distillation_loss": 0.020029647275805473, + "epoch": 6.73, + "learning_rate": 1.8420974113131927e-07, + "loss": 0.0187, + "step": 7090, + "task_loss": 0.006666556000709534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996028885455575, + "compression_loss": 0.0, + "distillation_loss": 0.03404942527413368, + "epoch": 6.73, + "learning_rate": 1.8292088505045546e-07, + "loss": 0.0398, + "step": 7091, + "task_loss": 0.09123058617115021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996037809305142, + "compression_loss": 0.0, + "distillation_loss": 0.03140270709991455, + "epoch": 6.74, + "learning_rate": 1.8163653711153194e-07, + "loss": 0.0426, + "step": 7092, + "task_loss": 0.14372628927230835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996046719775627, + "compression_loss": 0.0, + "distillation_loss": 0.027259420603513718, + "epoch": 6.74, + "learning_rate": 1.8035669754785934e-07, + "loss": 0.0421, + "step": 7093, + "task_loss": 0.17557671666145325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996055616877066, + "compression_loss": 0.0, + "distillation_loss": 0.023833533748984337, + "epoch": 6.74, + "learning_rate": 1.7908136659192387e-07, + "loss": 0.0223, + "step": 7094, + "task_loss": 0.00828717090189457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996064500619496, + "compression_loss": 0.0, + "distillation_loss": 0.11141105741262436, + "epoch": 6.74, + "learning_rate": 1.7781054447539858e-07, + "loss": 0.1124, + "step": 7095, + "task_loss": 0.12159767001867294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996073371012955, + "compression_loss": 0.0, + "distillation_loss": 0.0840909332036972, + "epoch": 6.74, + "learning_rate": 1.7654423142913213e-07, + "loss": 0.0868, + "step": 7096, + "task_loss": 0.1110580712556839 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996082228067479, + "compression_loss": 0.0, + "distillation_loss": 0.018214348703622818, + "epoch": 6.74, + "learning_rate": 1.7528242768315717e-07, + "loss": 0.0171, + "step": 7097, + "task_loss": 0.007498534396290779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996091071793104, + "compression_loss": 0.0, + "distillation_loss": 0.02279078960418701, + "epoch": 6.74, + "learning_rate": 1.7402513346668758e-07, + "loss": 0.021, + "step": 7098, + "task_loss": 0.0046605877578258514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996099902199868, + "compression_loss": 0.0, + "distillation_loss": 0.024694666266441345, + "epoch": 6.74, + "learning_rate": 1.727723490081129e-07, + "loss": 0.026, + "step": 7099, + "task_loss": 0.03818400576710701 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996108719297808, + "compression_loss": 0.0, + "distillation_loss": 0.04019855335354805, + "epoch": 6.74, + "learning_rate": 1.7152407453501219e-07, + "loss": 0.0374, + "step": 7100, + "task_loss": 0.012513427063822746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799611752309696, + "compression_loss": 0.0, + "distillation_loss": 0.028096789494156837, + "epoch": 6.74, + "learning_rate": 1.7028031027413737e-07, + "loss": 0.0337, + "step": 7101, + "task_loss": 0.08449839055538177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799612631360736, + "compression_loss": 0.0, + "distillation_loss": 0.05807451158761978, + "epoch": 6.74, + "learning_rate": 1.6904105645142444e-07, + "loss": 0.0659, + "step": 7102, + "task_loss": 0.13655823469161987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996135090839047, + "compression_loss": 0.0, + "distillation_loss": 0.05028388276696205, + "epoch": 6.75, + "learning_rate": 1.6780631329199326e-07, + "loss": 0.047, + "step": 7103, + "task_loss": 0.01732676476240158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996143854802057, + "compression_loss": 0.0, + "distillation_loss": 0.01434339303523302, + "epoch": 6.75, + "learning_rate": 1.6657608102013667e-07, + "loss": 0.0217, + "step": 7104, + "task_loss": 0.08795058727264404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996152605506426, + "compression_loss": 0.0, + "distillation_loss": 0.026891184970736504, + "epoch": 6.75, + "learning_rate": 1.65350359859337e-07, + "loss": 0.0254, + "step": 7105, + "task_loss": 0.011834867298603058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996161342962191, + "compression_loss": 0.0, + "distillation_loss": 0.058868955820798874, + "epoch": 6.75, + "learning_rate": 1.6412915003224672e-07, + "loss": 0.056, + "step": 7106, + "task_loss": 0.030042661353945732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799617006717939, + "compression_loss": 0.0, + "distillation_loss": 0.023804733529686928, + "epoch": 6.75, + "learning_rate": 1.629124517607078e-07, + "loss": 0.022, + "step": 7107, + "task_loss": 0.006073350086808205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996178778168059, + "compression_loss": 0.0, + "distillation_loss": 0.025645965710282326, + "epoch": 6.75, + "learning_rate": 1.6170026526574066e-07, + "loss": 0.0248, + "step": 7108, + "task_loss": 0.0175074003636837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996187475938235, + "compression_loss": 0.0, + "distillation_loss": 0.031395357102155685, + "epoch": 6.75, + "learning_rate": 1.6049259076753864e-07, + "loss": 0.0353, + "step": 7109, + "task_loss": 0.07032643258571625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996196160499953, + "compression_loss": 0.0, + "distillation_loss": 0.025923658162355423, + "epoch": 6.75, + "learning_rate": 1.592894284854901e-07, + "loss": 0.024, + "step": 7110, + "task_loss": 0.006603613495826721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996204831863253, + "compression_loss": 0.0, + "distillation_loss": 0.11871407926082611, + "epoch": 6.75, + "learning_rate": 1.5809077863814803e-07, + "loss": 0.117, + "step": 7111, + "task_loss": 0.10196692496538162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799621349003817, + "compression_loss": 0.0, + "distillation_loss": 0.06971892714500427, + "epoch": 6.75, + "learning_rate": 1.5689664144325766e-07, + "loss": 0.0639, + "step": 7112, + "task_loss": 0.011437831446528435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996222135034742, + "compression_loss": 0.0, + "distillation_loss": 0.0784948393702507, + "epoch": 6.75, + "learning_rate": 1.5570701711773717e-07, + "loss": 0.0759, + "step": 7113, + "task_loss": 0.05259266868233681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996230766863003, + "compression_loss": 0.0, + "distillation_loss": 0.03151058405637741, + "epoch": 6.76, + "learning_rate": 1.5452190587768867e-07, + "loss": 0.0486, + "step": 7114, + "task_loss": 0.20285047590732574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996239385532993, + "compression_loss": 0.0, + "distillation_loss": 0.09722236543893814, + "epoch": 6.76, + "learning_rate": 1.5334130793839275e-07, + "loss": 0.1164, + "step": 7115, + "task_loss": 0.2885318398475647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996247991054748, + "compression_loss": 0.0, + "distillation_loss": 0.07263641804456711, + "epoch": 6.76, + "learning_rate": 1.5216522351431118e-07, + "loss": 0.0823, + "step": 7116, + "task_loss": 0.16909703612327576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996256583438304, + "compression_loss": 0.0, + "distillation_loss": 0.04487297311425209, + "epoch": 6.76, + "learning_rate": 1.5099365281908694e-07, + "loss": 0.0531, + "step": 7117, + "task_loss": 0.12665140628814697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996265162693698, + "compression_loss": 0.0, + "distillation_loss": 0.013267583213746548, + "epoch": 6.76, + "learning_rate": 1.4982659606553872e-07, + "loss": 0.0206, + "step": 7118, + "task_loss": 0.08665129542350769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996273728830966, + "compression_loss": 0.0, + "distillation_loss": 0.012607241049408913, + "epoch": 6.76, + "learning_rate": 1.4866405346566914e-07, + "loss": 0.0118, + "step": 7119, + "task_loss": 0.004675716161727905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996282281860148, + "compression_loss": 0.0, + "distillation_loss": 0.06657108664512634, + "epoch": 6.76, + "learning_rate": 1.4750602523065648e-07, + "loss": 0.0707, + "step": 7120, + "task_loss": 0.10777979344129562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996290821791276, + "compression_loss": 0.0, + "distillation_loss": 0.028429970145225525, + "epoch": 6.76, + "learning_rate": 1.463525115708686e-07, + "loss": 0.0311, + "step": 7121, + "task_loss": 0.05511371046304703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996299348634391, + "compression_loss": 0.0, + "distillation_loss": 0.039049431681632996, + "epoch": 6.76, + "learning_rate": 1.4520351269584343e-07, + "loss": 0.0477, + "step": 7122, + "task_loss": 0.12538807094097137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996307862399529, + "compression_loss": 0.0, + "distillation_loss": 0.036447908729314804, + "epoch": 6.76, + "learning_rate": 1.4405902881430288e-07, + "loss": 0.0352, + "step": 7123, + "task_loss": 0.023651236668229103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996316363096726, + "compression_loss": 0.0, + "distillation_loss": 0.027506425976753235, + "epoch": 6.77, + "learning_rate": 1.4291906013414457e-07, + "loss": 0.0269, + "step": 7124, + "task_loss": 0.02168971858918667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996324850736019, + "compression_loss": 0.0, + "distillation_loss": 0.02820620872080326, + "epoch": 6.77, + "learning_rate": 1.417836068624556e-07, + "loss": 0.0268, + "step": 7125, + "task_loss": 0.014426644891500473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996333325327444, + "compression_loss": 0.0, + "distillation_loss": 0.04337786138057709, + "epoch": 6.77, + "learning_rate": 1.406526692054877e-07, + "loss": 0.0525, + "step": 7126, + "task_loss": 0.13505274057388306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996341786881038, + "compression_loss": 0.0, + "distillation_loss": 0.031544819474220276, + "epoch": 6.77, + "learning_rate": 1.3952624736869036e-07, + "loss": 0.0305, + "step": 7127, + "task_loss": 0.0206135306507349 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996350235406839, + "compression_loss": 0.0, + "distillation_loss": 0.027235832065343857, + "epoch": 6.77, + "learning_rate": 1.3840434155668047e-07, + "loss": 0.0423, + "step": 7128, + "task_loss": 0.17793741822242737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996358670914885, + "compression_loss": 0.0, + "distillation_loss": 0.026448873803019524, + "epoch": 6.77, + "learning_rate": 1.3728695197325336e-07, + "loss": 0.0328, + "step": 7129, + "task_loss": 0.08974019438028336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996367093415209, + "compression_loss": 0.0, + "distillation_loss": 0.03855022042989731, + "epoch": 6.77, + "learning_rate": 1.361740788213911e-07, + "loss": 0.0482, + "step": 7130, + "task_loss": 0.13530199229717255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996375502917851, + "compression_loss": 0.0, + "distillation_loss": 0.02457747608423233, + "epoch": 6.77, + "learning_rate": 1.3506572230325698e-07, + "loss": 0.0226, + "step": 7131, + "task_loss": 0.004712225869297981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996383899432846, + "compression_loss": 0.0, + "distillation_loss": 0.09410484880208969, + "epoch": 6.77, + "learning_rate": 1.3396188262018438e-07, + "loss": 0.1019, + "step": 7132, + "task_loss": 0.17175397276878357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996392282970232, + "compression_loss": 0.0, + "distillation_loss": 0.05483167618513107, + "epoch": 6.77, + "learning_rate": 1.3286255997268793e-07, + "loss": 0.0599, + "step": 7133, + "task_loss": 0.10567161440849304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996400653540044, + "compression_loss": 0.0, + "distillation_loss": 0.03218763321638107, + "epoch": 6.77, + "learning_rate": 1.3176775456047175e-07, + "loss": 0.0324, + "step": 7134, + "task_loss": 0.034330014139413834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996409011152322, + "compression_loss": 0.0, + "distillation_loss": 0.038693610578775406, + "epoch": 6.78, + "learning_rate": 1.3067746658241008e-07, + "loss": 0.0474, + "step": 7135, + "task_loss": 0.12591272592544556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79964173558171, + "compression_loss": 0.0, + "distillation_loss": 0.018780706450343132, + "epoch": 6.78, + "learning_rate": 1.2959169623655843e-07, + "loss": 0.0281, + "step": 7136, + "task_loss": 0.11212365329265594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996425687544417, + "compression_loss": 0.0, + "distillation_loss": 0.021541312336921692, + "epoch": 6.78, + "learning_rate": 1.2851044372015064e-07, + "loss": 0.0275, + "step": 7137, + "task_loss": 0.08120422065258026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996434006344307, + "compression_loss": 0.0, + "distillation_loss": 0.018733816221356392, + "epoch": 6.78, + "learning_rate": 1.2743370922960462e-07, + "loss": 0.0175, + "step": 7138, + "task_loss": 0.006464965641498566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799644231222681, + "compression_loss": 0.0, + "distillation_loss": 0.04339815676212311, + "epoch": 6.78, + "learning_rate": 1.263614929605139e-07, + "loss": 0.0406, + "step": 7139, + "task_loss": 0.015392309054732323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799645060520196, + "compression_loss": 0.0, + "distillation_loss": 0.01880636438727379, + "epoch": 6.78, + "learning_rate": 1.252937951076477e-07, + "loss": 0.0175, + "step": 7140, + "task_loss": 0.005763756111264229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996458885279796, + "compression_loss": 0.0, + "distillation_loss": 0.0439765602350235, + "epoch": 6.78, + "learning_rate": 1.2423061586496477e-07, + "loss": 0.0415, + "step": 7141, + "task_loss": 0.019159667193889618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996467152470353, + "compression_loss": 0.0, + "distillation_loss": 0.036824434995651245, + "epoch": 6.78, + "learning_rate": 1.231719554255939e-07, + "loss": 0.0418, + "step": 7142, + "task_loss": 0.08699777722358704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996475406783671, + "compression_loss": 0.0, + "distillation_loss": 0.014559917151927948, + "epoch": 6.78, + "learning_rate": 1.2211781398184242e-07, + "loss": 0.0138, + "step": 7143, + "task_loss": 0.0064619556069374084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996483648229782, + "compression_loss": 0.0, + "distillation_loss": 0.042142875492572784, + "epoch": 6.78, + "learning_rate": 1.2106819172520434e-07, + "loss": 0.0426, + "step": 7144, + "task_loss": 0.046216074377298355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996491876818727, + "compression_loss": 0.0, + "distillation_loss": 0.023237306624650955, + "epoch": 6.79, + "learning_rate": 1.200230888463466e-07, + "loss": 0.0304, + "step": 7145, + "task_loss": 0.0949011892080307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799650009256054, + "compression_loss": 0.0, + "distillation_loss": 0.018296122550964355, + "epoch": 6.79, + "learning_rate": 1.1898250553512014e-07, + "loss": 0.024, + "step": 7146, + "task_loss": 0.07504522800445557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799650829546526, + "compression_loss": 0.0, + "distillation_loss": 0.02411121129989624, + "epoch": 6.79, + "learning_rate": 1.1794644198054871e-07, + "loss": 0.0222, + "step": 7147, + "task_loss": 0.0048682671040296555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996516485542922, + "compression_loss": 0.0, + "distillation_loss": 0.07223325222730637, + "epoch": 6.79, + "learning_rate": 1.1691489837083735e-07, + "loss": 0.0702, + "step": 7148, + "task_loss": 0.052256107330322266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996524662803564, + "compression_loss": 0.0, + "distillation_loss": 0.0212749931961298, + "epoch": 6.79, + "learning_rate": 1.1588787489337505e-07, + "loss": 0.0325, + "step": 7149, + "task_loss": 0.13337501883506775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996532827257222, + "compression_loss": 0.0, + "distillation_loss": 0.014818870462477207, + "epoch": 6.79, + "learning_rate": 1.1486537173472367e-07, + "loss": 0.014, + "step": 7150, + "task_loss": 0.006277905777096748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996540978913934, + "compression_loss": 0.0, + "distillation_loss": 0.05914817005395889, + "epoch": 6.79, + "learning_rate": 1.1384738908062631e-07, + "loss": 0.0666, + "step": 7151, + "task_loss": 0.1340140402317047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996549117783737, + "compression_loss": 0.0, + "distillation_loss": 0.044574763625860214, + "epoch": 6.79, + "learning_rate": 1.1283392711600171e-07, + "loss": 0.0479, + "step": 7152, + "task_loss": 0.07772877812385559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996557243876665, + "compression_loss": 0.0, + "distillation_loss": 0.0406227707862854, + "epoch": 6.79, + "learning_rate": 1.1182498602495539e-07, + "loss": 0.0471, + "step": 7153, + "task_loss": 0.10571486502885818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996565357202758, + "compression_loss": 0.0, + "distillation_loss": 0.08192355930805206, + "epoch": 6.79, + "learning_rate": 1.1082056599076018e-07, + "loss": 0.0774, + "step": 7154, + "task_loss": 0.03656609356403351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996573457772052, + "compression_loss": 0.0, + "distillation_loss": 0.0265779010951519, + "epoch": 6.79, + "learning_rate": 1.0982066719587569e-07, + "loss": 0.0401, + "step": 7155, + "task_loss": 0.16203996539115906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996581545594582, + "compression_loss": 0.0, + "distillation_loss": 0.017745571210980415, + "epoch": 6.8, + "learning_rate": 1.0882528982194273e-07, + "loss": 0.0164, + "step": 7156, + "task_loss": 0.0038829054683446884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996589620680388, + "compression_loss": 0.0, + "distillation_loss": 0.022481059655547142, + "epoch": 6.8, + "learning_rate": 1.0783443404976946e-07, + "loss": 0.0208, + "step": 7157, + "task_loss": 0.005929671227931976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996597683039504, + "compression_loss": 0.0, + "distillation_loss": 0.02317318134009838, + "epoch": 6.8, + "learning_rate": 1.0684810005935631e-07, + "loss": 0.0214, + "step": 7158, + "task_loss": 0.005201876163482666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996605732681967, + "compression_loss": 0.0, + "distillation_loss": 0.05156789720058441, + "epoch": 6.8, + "learning_rate": 1.0586628802987108e-07, + "loss": 0.0496, + "step": 7159, + "task_loss": 0.03225575387477875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996613769617816, + "compression_loss": 0.0, + "distillation_loss": 0.02196839265525341, + "epoch": 6.8, + "learning_rate": 1.0488899813966835e-07, + "loss": 0.0267, + "step": 7160, + "task_loss": 0.06964989006519318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996621793857086, + "compression_loss": 0.0, + "distillation_loss": 0.06468576192855835, + "epoch": 6.8, + "learning_rate": 1.0391623056627275e-07, + "loss": 0.0656, + "step": 7161, + "task_loss": 0.07367479801177979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996629805409815, + "compression_loss": 0.0, + "distillation_loss": 0.04920611530542374, + "epoch": 6.8, + "learning_rate": 1.0294798548639573e-07, + "loss": 0.0528, + "step": 7162, + "task_loss": 0.08470079302787781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996637804286039, + "compression_loss": 0.0, + "distillation_loss": 0.032296232879161835, + "epoch": 6.8, + "learning_rate": 1.0198426307592157e-07, + "loss": 0.0307, + "step": 7163, + "task_loss": 0.01606859639286995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996645790495794, + "compression_loss": 0.0, + "distillation_loss": 0.04291418194770813, + "epoch": 6.8, + "learning_rate": 1.010250635099158e-07, + "loss": 0.0641, + "step": 7164, + "task_loss": 0.25430774688720703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996653764049119, + "compression_loss": 0.0, + "distillation_loss": 0.02546636387705803, + "epoch": 6.8, + "learning_rate": 1.0007038696262516e-07, + "loss": 0.0302, + "step": 7165, + "task_loss": 0.07231911271810532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996661724956049, + "compression_loss": 0.0, + "distillation_loss": 0.028939777985215187, + "epoch": 6.81, + "learning_rate": 9.912023360746647e-08, + "loss": 0.0268, + "step": 7166, + "task_loss": 0.007344096899032593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996669673226621, + "compression_loss": 0.0, + "distillation_loss": 0.11444491147994995, + "epoch": 6.81, + "learning_rate": 9.817460361704056e-08, + "loss": 0.1122, + "step": 7167, + "task_loss": 0.0920182541012764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996677608870872, + "compression_loss": 0.0, + "distillation_loss": 0.10061150789260864, + "epoch": 6.81, + "learning_rate": 9.723349716312668e-08, + "loss": 0.0963, + "step": 7168, + "task_loss": 0.05738703906536102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996685531898841, + "compression_loss": 0.0, + "distillation_loss": 0.08503048866987228, + "epoch": 6.81, + "learning_rate": 9.629691441667976e-08, + "loss": 0.093, + "step": 7169, + "task_loss": 0.1647735834121704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996693442320562, + "compression_loss": 0.0, + "distillation_loss": 0.028403718024492264, + "epoch": 6.81, + "learning_rate": 9.53648555478387e-08, + "loss": 0.0262, + "step": 7170, + "task_loss": 0.006750069558620453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996701340146072, + "compression_loss": 0.0, + "distillation_loss": 0.01994595304131508, + "epoch": 6.81, + "learning_rate": 9.443732072591249e-08, + "loss": 0.0188, + "step": 7171, + "task_loss": 0.008648447692394257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996709225385409, + "compression_loss": 0.0, + "distillation_loss": 0.06424511969089508, + "epoch": 6.81, + "learning_rate": 9.351431011939138e-08, + "loss": 0.0761, + "step": 7172, + "task_loss": 0.18302549421787262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799671709804861, + "compression_loss": 0.0, + "distillation_loss": 0.020225565880537033, + "epoch": 6.81, + "learning_rate": 9.259582389594956e-08, + "loss": 0.0185, + "step": 7173, + "task_loss": 0.002962857484817505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799672495814571, + "compression_loss": 0.0, + "distillation_loss": 0.07207255065441132, + "epoch": 6.81, + "learning_rate": 9.168186222243136e-08, + "loss": 0.0706, + "step": 7174, + "task_loss": 0.05724616348743439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996732805686748, + "compression_loss": 0.0, + "distillation_loss": 0.02577575296163559, + "epoch": 6.81, + "learning_rate": 9.077242526485952e-08, + "loss": 0.027, + "step": 7175, + "task_loss": 0.03761523589491844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799674064068176, + "compression_loss": 0.0, + "distillation_loss": 0.03172352910041809, + "epoch": 6.81, + "learning_rate": 8.986751318844355e-08, + "loss": 0.0399, + "step": 7176, + "task_loss": 0.11340309679508209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996748463140781, + "compression_loss": 0.0, + "distillation_loss": 0.03157733753323555, + "epoch": 6.82, + "learning_rate": 8.896712615756308e-08, + "loss": 0.0311, + "step": 7177, + "task_loss": 0.02710825763642788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996756273073852, + "compression_loss": 0.0, + "distillation_loss": 0.01715652272105217, + "epoch": 6.82, + "learning_rate": 8.807126433577617e-08, + "loss": 0.0165, + "step": 7178, + "task_loss": 0.010277284309267998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996764070491006, + "compression_loss": 0.0, + "distillation_loss": 0.03965865075588226, + "epoch": 6.82, + "learning_rate": 8.717992788582207e-08, + "loss": 0.0431, + "step": 7179, + "task_loss": 0.07392223924398422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799677185540228, + "compression_loss": 0.0, + "distillation_loss": 0.04710112512111664, + "epoch": 6.82, + "learning_rate": 8.629311696961295e-08, + "loss": 0.0486, + "step": 7180, + "task_loss": 0.06256051361560822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996779627817714, + "compression_loss": 0.0, + "distillation_loss": 0.04641801118850708, + "epoch": 6.82, + "learning_rate": 8.541083174824771e-08, + "loss": 0.0597, + "step": 7181, + "task_loss": 0.17936520278453827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996787387747342, + "compression_loss": 0.0, + "distillation_loss": 0.017239782959222794, + "epoch": 6.82, + "learning_rate": 8.453307238199259e-08, + "loss": 0.0159, + "step": 7182, + "task_loss": 0.003765510395169258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996795135201201, + "compression_loss": 0.0, + "distillation_loss": 0.13984356820583344, + "epoch": 6.82, + "learning_rate": 8.365983903030061e-08, + "loss": 0.139, + "step": 7183, + "task_loss": 0.13107913732528687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996802870189329, + "compression_loss": 0.0, + "distillation_loss": 0.07437920570373535, + "epoch": 6.82, + "learning_rate": 8.279113185179488e-08, + "loss": 0.0826, + "step": 7184, + "task_loss": 0.15642966330051422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996810592721761, + "compression_loss": 0.0, + "distillation_loss": 0.036259036511182785, + "epoch": 6.82, + "learning_rate": 8.192695100428527e-08, + "loss": 0.0332, + "step": 7185, + "task_loss": 0.005903052166104317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996818302808537, + "compression_loss": 0.0, + "distillation_loss": 0.018497422337532043, + "epoch": 6.82, + "learning_rate": 8.106729664475176e-08, + "loss": 0.0241, + "step": 7186, + "task_loss": 0.07415502518415451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799682600045969, + "compression_loss": 0.0, + "distillation_loss": 0.03047127276659012, + "epoch": 6.83, + "learning_rate": 8.021216892935279e-08, + "loss": 0.0313, + "step": 7187, + "task_loss": 0.038899097591638565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799683368568526, + "compression_loss": 0.0, + "distillation_loss": 0.053710635751485825, + "epoch": 6.83, + "learning_rate": 7.936156801342797e-08, + "loss": 0.0536, + "step": 7188, + "task_loss": 0.052580974996089935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996841358495282, + "compression_loss": 0.0, + "distillation_loss": 0.06118376553058624, + "epoch": 6.83, + "learning_rate": 7.851549405149539e-08, + "loss": 0.0601, + "step": 7189, + "task_loss": 0.050785936415195465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996849018899793, + "compression_loss": 0.0, + "distillation_loss": 0.02713852934539318, + "epoch": 6.83, + "learning_rate": 7.767394719724597e-08, + "loss": 0.0425, + "step": 7190, + "task_loss": 0.18106593191623688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996856666908831, + "compression_loss": 0.0, + "distillation_loss": 0.06980240345001221, + "epoch": 6.83, + "learning_rate": 7.683692760355187e-08, + "loss": 0.076, + "step": 7191, + "task_loss": 0.13159391283988953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996864302532432, + "compression_loss": 0.0, + "distillation_loss": 0.019683901220560074, + "epoch": 6.83, + "learning_rate": 7.600443542246094e-08, + "loss": 0.0184, + "step": 7192, + "task_loss": 0.00683322548866272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996871925780632, + "compression_loss": 0.0, + "distillation_loss": 0.02726883627474308, + "epoch": 6.83, + "learning_rate": 7.51764708051994e-08, + "loss": 0.0323, + "step": 7193, + "task_loss": 0.0772031769156456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996879536663469, + "compression_loss": 0.0, + "distillation_loss": 0.15833452343940735, + "epoch": 6.83, + "learning_rate": 7.435303390216919e-08, + "loss": 0.1523, + "step": 7194, + "task_loss": 0.09784331917762756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799688713519098, + "compression_loss": 0.0, + "distillation_loss": 0.02268981747329235, + "epoch": 6.83, + "learning_rate": 7.353412486295619e-08, + "loss": 0.0208, + "step": 7195, + "task_loss": 0.004109309986233711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79968947213732, + "compression_loss": 0.0, + "distillation_loss": 0.024118095636367798, + "epoch": 6.83, + "learning_rate": 7.271974383631642e-08, + "loss": 0.0263, + "step": 7196, + "task_loss": 0.04595687985420227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996902295220168, + "compression_loss": 0.0, + "distillation_loss": 0.028797656297683716, + "epoch": 6.83, + "learning_rate": 7.19098909701843e-08, + "loss": 0.0415, + "step": 7197, + "task_loss": 0.1560361236333847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799690985674192, + "compression_loss": 0.0, + "distillation_loss": 0.03179541975259781, + "epoch": 6.84, + "learning_rate": 7.110456641167829e-08, + "loss": 0.0305, + "step": 7198, + "task_loss": 0.018481941893696785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996917405948492, + "compression_loss": 0.0, + "distillation_loss": 0.01870710775256157, + "epoch": 6.84, + "learning_rate": 7.030377030708412e-08, + "loss": 0.0181, + "step": 7199, + "task_loss": 0.012378891929984093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996924942849922, + "compression_loss": 0.0, + "distillation_loss": 0.06044170260429382, + "epoch": 6.84, + "learning_rate": 6.950750280187435e-08, + "loss": 0.0777, + "step": 7200, + "task_loss": 0.23314093053340912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996932467456246, + "compression_loss": 0.0, + "distillation_loss": 0.05844536051154137, + "epoch": 6.84, + "learning_rate": 6.871576404069158e-08, + "loss": 0.0756, + "step": 7201, + "task_loss": 0.23015083372592926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996939979777502, + "compression_loss": 0.0, + "distillation_loss": 0.01944398134946823, + "epoch": 6.84, + "learning_rate": 6.792855416736243e-08, + "loss": 0.0264, + "step": 7202, + "task_loss": 0.08910975605249405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996947479823725, + "compression_loss": 0.0, + "distillation_loss": 0.024285856634378433, + "epoch": 6.84, + "learning_rate": 6.714587332488364e-08, + "loss": 0.0317, + "step": 7203, + "task_loss": 0.09848549962043762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996954967604953, + "compression_loss": 0.0, + "distillation_loss": 0.06020985543727875, + "epoch": 6.84, + "learning_rate": 6.63677216554387e-08, + "loss": 0.0604, + "step": 7204, + "task_loss": 0.062497399747371674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996962443131224, + "compression_loss": 0.0, + "distillation_loss": 0.03330262005329132, + "epoch": 6.84, + "learning_rate": 6.559409930037563e-08, + "loss": 0.0311, + "step": 7205, + "task_loss": 0.011487048119306564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996969906412571, + "compression_loss": 0.0, + "distillation_loss": 0.08118142187595367, + "epoch": 6.84, + "learning_rate": 6.482500640022926e-08, + "loss": 0.0848, + "step": 7206, + "task_loss": 0.11695171892642975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996977357459035, + "compression_loss": 0.0, + "distillation_loss": 0.030360179021954536, + "epoch": 6.84, + "learning_rate": 6.406044309471005e-08, + "loss": 0.0329, + "step": 7207, + "task_loss": 0.05534449219703674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996984796280651, + "compression_loss": 0.0, + "distillation_loss": 0.08223249763250351, + "epoch": 6.85, + "learning_rate": 6.406044309471005e-08, + "loss": 0.0893, + "step": 7208, + "task_loss": 0.15278904139995575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996992222887456, + "compression_loss": 0.0, + "distillation_loss": 0.022313468158245087, + "epoch": 6.85, + "learning_rate": 6.330040952270688e-08, + "loss": 0.0244, + "step": 7209, + "task_loss": 0.04346153512597084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7996999637289486, + "compression_loss": 0.0, + "distillation_loss": 0.0184345506131649, + "epoch": 6.85, + "learning_rate": 6.254490582227879e-08, + "loss": 0.017, + "step": 7210, + "task_loss": 0.004323702305555344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799700703949678, + "compression_loss": 0.0, + "distillation_loss": 0.020290520042181015, + "epoch": 6.85, + "learning_rate": 6.179393213066875e-08, + "loss": 0.0271, + "step": 7211, + "task_loss": 0.08879521489143372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997014429519371, + "compression_loss": 0.0, + "distillation_loss": 0.020489336922764778, + "epoch": 6.85, + "learning_rate": 6.104748858429266e-08, + "loss": 0.0298, + "step": 7212, + "task_loss": 0.11318296194076538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79970218073673, + "compression_loss": 0.0, + "distillation_loss": 0.05874666944146156, + "epoch": 6.85, + "learning_rate": 6.030557531875036e-08, + "loss": 0.0728, + "step": 7213, + "task_loss": 0.1989019811153412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997029173050602, + "compression_loss": 0.0, + "distillation_loss": 0.03400994837284088, + "epoch": 6.85, + "learning_rate": 5.956819246881185e-08, + "loss": 0.0417, + "step": 7214, + "task_loss": 0.11090053617954254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997036526579313, + "compression_loss": 0.0, + "distillation_loss": 0.02666945569217205, + "epoch": 6.85, + "learning_rate": 5.8835340168422734e-08, + "loss": 0.027, + "step": 7215, + "task_loss": 0.029919061809778214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997043867963471, + "compression_loss": 0.0, + "distillation_loss": 0.032640133053064346, + "epoch": 6.85, + "learning_rate": 5.8107018550712656e-08, + "loss": 0.0301, + "step": 7216, + "task_loss": 0.0070366039872169495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997051197213113, + "compression_loss": 0.0, + "distillation_loss": 0.02398163080215454, + "epoch": 6.85, + "learning_rate": 5.7383227747984105e-08, + "loss": 0.0315, + "step": 7217, + "task_loss": 0.09866875410079956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997058514338276, + "compression_loss": 0.0, + "distillation_loss": 0.01552260760217905, + "epoch": 6.85, + "learning_rate": 5.6663967891718015e-08, + "loss": 0.0146, + "step": 7218, + "task_loss": 0.006190884858369827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997065819348994, + "compression_loss": 0.0, + "distillation_loss": 0.06299366801977158, + "epoch": 6.86, + "learning_rate": 5.5949239112570997e-08, + "loss": 0.0748, + "step": 7219, + "task_loss": 0.18070363998413086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997073112255307, + "compression_loss": 0.0, + "distillation_loss": 0.056522756814956665, + "epoch": 6.86, + "learning_rate": 5.523904154037529e-08, + "loss": 0.0572, + "step": 7220, + "task_loss": 0.06307417154312134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997080393067251, + "compression_loss": 0.0, + "distillation_loss": 0.02577255666255951, + "epoch": 6.86, + "learning_rate": 5.453337530414437e-08, + "loss": 0.0269, + "step": 7221, + "task_loss": 0.03711457550525665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997087661794863, + "compression_loss": 0.0, + "distillation_loss": 0.024436548352241516, + "epoch": 6.86, + "learning_rate": 5.383224053206459e-08, + "loss": 0.0283, + "step": 7222, + "task_loss": 0.06333892792463303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997094918448179, + "compression_loss": 0.0, + "distillation_loss": 0.040110617876052856, + "epoch": 6.86, + "learning_rate": 5.313563735149796e-08, + "loss": 0.0419, + "step": 7223, + "task_loss": 0.057536765933036804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997102163037235, + "compression_loss": 0.0, + "distillation_loss": 0.026415549218654633, + "epoch": 6.86, + "learning_rate": 5.2443565888990466e-08, + "loss": 0.0339, + "step": 7224, + "task_loss": 0.10137321054935455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997109395572071, + "compression_loss": 0.0, + "distillation_loss": 0.05468286946415901, + "epoch": 6.86, + "learning_rate": 5.1756026270258215e-08, + "loss": 0.0534, + "step": 7225, + "task_loss": 0.0421706885099411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799711661606272, + "compression_loss": 0.0, + "distillation_loss": 0.04075568914413452, + "epoch": 6.86, + "learning_rate": 5.107301862019575e-08, + "loss": 0.0453, + "step": 7226, + "task_loss": 0.08584432303905487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997123824519222, + "compression_loss": 0.0, + "distillation_loss": 0.04388091713190079, + "epoch": 6.86, + "learning_rate": 5.0394543062873276e-08, + "loss": 0.053, + "step": 7227, + "task_loss": 0.13531340658664703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997131020951613, + "compression_loss": 0.0, + "distillation_loss": 0.03439214825630188, + "epoch": 6.86, + "learning_rate": 4.972059972154219e-08, + "loss": 0.0332, + "step": 7228, + "task_loss": 0.021979160606861115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997138205369929, + "compression_loss": 0.0, + "distillation_loss": 0.034759216010570526, + "epoch": 6.87, + "learning_rate": 4.905118871862402e-08, + "loss": 0.038, + "step": 7229, + "task_loss": 0.06708045303821564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997145377784206, + "compression_loss": 0.0, + "distillation_loss": 0.08172931522130966, + "epoch": 6.87, + "learning_rate": 4.838631017572426e-08, + "loss": 0.0904, + "step": 7230, + "task_loss": 0.16795513033866882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997152538204483, + "compression_loss": 0.0, + "distillation_loss": 0.027006901800632477, + "epoch": 6.87, + "learning_rate": 4.772596421361852e-08, + "loss": 0.0248, + "step": 7231, + "task_loss": 0.0044814664870500565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997159686640796, + "compression_loss": 0.0, + "distillation_loss": 0.053238674998283386, + "epoch": 6.87, + "learning_rate": 4.7070150952263634e-08, + "loss": 0.068, + "step": 7232, + "task_loss": 0.2006755918264389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997166823103181, + "compression_loss": 0.0, + "distillation_loss": 0.04943888634443283, + "epoch": 6.87, + "learning_rate": 4.64188705107893e-08, + "loss": 0.0456, + "step": 7233, + "task_loss": 0.011105941608548164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997173947601676, + "compression_loss": 0.0, + "distillation_loss": 0.025495588779449463, + "epoch": 6.87, + "learning_rate": 4.577212300750644e-08, + "loss": 0.0365, + "step": 7234, + "task_loss": 0.1354934424161911 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997181060146317, + "compression_loss": 0.0, + "distillation_loss": 0.021435732021927834, + "epoch": 6.87, + "learning_rate": 4.5129908559896075e-08, + "loss": 0.0301, + "step": 7235, + "task_loss": 0.10771320015192032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997188160747141, + "compression_loss": 0.0, + "distillation_loss": 0.03189963847398758, + "epoch": 6.87, + "learning_rate": 4.449222728462599e-08, + "loss": 0.0375, + "step": 7236, + "task_loss": 0.08780453354120255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997195249414185, + "compression_loss": 0.0, + "distillation_loss": 0.08029976487159729, + "epoch": 6.87, + "learning_rate": 4.3859079297525754e-08, + "loss": 0.0884, + "step": 7237, + "task_loss": 0.1608220934867859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997202326157485, + "compression_loss": 0.0, + "distillation_loss": 0.07394590228796005, + "epoch": 6.87, + "learning_rate": 4.323046471361447e-08, + "loss": 0.0696, + "step": 7238, + "task_loss": 0.0303100124001503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799720939098708, + "compression_loss": 0.0, + "distillation_loss": 0.08067163825035095, + "epoch": 6.87, + "learning_rate": 4.2606383647084134e-08, + "loss": 0.0771, + "step": 7239, + "task_loss": 0.04478314146399498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997216443913004, + "compression_loss": 0.0, + "distillation_loss": 0.0285223089158535, + "epoch": 6.88, + "learning_rate": 4.198683621129962e-08, + "loss": 0.0336, + "step": 7240, + "task_loss": 0.07915632426738739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997223484945296, + "compression_loss": 0.0, + "distillation_loss": 0.02331477962434292, + "epoch": 6.88, + "learning_rate": 4.1371822518804224e-08, + "loss": 0.0306, + "step": 7241, + "task_loss": 0.09588571637868881 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997230514093993, + "compression_loss": 0.0, + "distillation_loss": 0.07636144757270813, + "epoch": 6.88, + "learning_rate": 4.0761342681319706e-08, + "loss": 0.0866, + "step": 7242, + "task_loss": 0.1786966323852539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997237531369129, + "compression_loss": 0.0, + "distillation_loss": 0.08760742098093033, + "epoch": 6.88, + "learning_rate": 4.015539680974345e-08, + "loss": 0.0921, + "step": 7243, + "task_loss": 0.13226494193077087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997244536780743, + "compression_loss": 0.0, + "distillation_loss": 0.07857417315244675, + "epoch": 6.88, + "learning_rate": 3.955398501414576e-08, + "loss": 0.0795, + "step": 7244, + "task_loss": 0.08820837736129761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997251530338872, + "compression_loss": 0.0, + "distillation_loss": 0.024933654814958572, + "epoch": 6.88, + "learning_rate": 3.895710740378089e-08, + "loss": 0.0264, + "step": 7245, + "task_loss": 0.03946792706847191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997258512053552, + "compression_loss": 0.0, + "distillation_loss": 0.0409945584833622, + "epoch": 6.88, + "learning_rate": 3.8364764087067685e-08, + "loss": 0.041, + "step": 7246, + "task_loss": 0.04090442508459091 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799726548193482, + "compression_loss": 0.0, + "distillation_loss": 0.1333804726600647, + "epoch": 6.88, + "learning_rate": 3.777695517161173e-08, + "loss": 0.1276, + "step": 7247, + "task_loss": 0.07556381076574326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997272439992713, + "compression_loss": 0.0, + "distillation_loss": 0.03527265042066574, + "epoch": 6.88, + "learning_rate": 3.7193680764191496e-08, + "loss": 0.0327, + "step": 7248, + "task_loss": 0.00909445621073246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997279386237268, + "compression_loss": 0.0, + "distillation_loss": 0.0798153355717659, + "epoch": 6.88, + "learning_rate": 3.66149409707639e-08, + "loss": 0.0749, + "step": 7249, + "task_loss": 0.031093858182430267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997286320678522, + "compression_loss": 0.0, + "distillation_loss": 0.04000500962138176, + "epoch": 6.89, + "learning_rate": 3.604073589645596e-08, + "loss": 0.0366, + "step": 7250, + "task_loss": 0.00630572997033596 + }, + { + "epoch": 6.89, + "eval_accuracy": 0.8956422018348624, + "eval_loss": 0.4339929223060608, + "eval_runtime": 18.2364, + "eval_samples_per_second": 47.816, + "eval_steps_per_second": 5.977, + "step": 7250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997293243326511, + "compression_loss": 0.0, + "distillation_loss": 0.024513155221939087, + "epoch": 6.89, + "learning_rate": 3.547106564557312e-08, + "loss": 0.0265, + "step": 7251, + "task_loss": 0.0446399450302124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997300154191272, + "compression_loss": 0.0, + "distillation_loss": 0.02782837674021721, + "epoch": 6.89, + "learning_rate": 3.490593032160483e-08, + "loss": 0.0319, + "step": 7252, + "task_loss": 0.06867832690477371 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997307053282843, + "compression_loss": 0.0, + "distillation_loss": 0.02256028726696968, + "epoch": 6.89, + "learning_rate": 3.4345330027207856e-08, + "loss": 0.0274, + "step": 7253, + "task_loss": 0.07064933329820633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997313940611258, + "compression_loss": 0.0, + "distillation_loss": 0.047051578760147095, + "epoch": 6.89, + "learning_rate": 3.378926486421463e-08, + "loss": 0.061, + "step": 7254, + "task_loss": 0.18665584921836853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997320816186557, + "compression_loss": 0.0, + "distillation_loss": 0.016445623710751534, + "epoch": 6.89, + "learning_rate": 3.3237734933641574e-08, + "loss": 0.0218, + "step": 7255, + "task_loss": 0.07008873671293259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997327680018775, + "compression_loss": 0.0, + "distillation_loss": 0.03820062056183815, + "epoch": 6.89, + "learning_rate": 3.269074033567798e-08, + "loss": 0.0358, + "step": 7256, + "task_loss": 0.014143818989396095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799733453211795, + "compression_loss": 0.0, + "distillation_loss": 0.03633254021406174, + "epoch": 6.89, + "learning_rate": 3.21482811696805e-08, + "loss": 0.0364, + "step": 7257, + "task_loss": 0.036846742033958435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997341372494118, + "compression_loss": 0.0, + "distillation_loss": 0.03856148198246956, + "epoch": 6.89, + "learning_rate": 3.161035753419805e-08, + "loss": 0.0463, + "step": 7258, + "task_loss": 0.11632915586233139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997348201157316, + "compression_loss": 0.0, + "distillation_loss": 0.018683161586523056, + "epoch": 6.89, + "learning_rate": 3.107696952694139e-08, + "loss": 0.0372, + "step": 7259, + "task_loss": 0.20371021330356598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799735501811758, + "compression_loss": 0.0, + "distillation_loss": 0.018803803250193596, + "epoch": 6.89, + "learning_rate": 3.054811724480522e-08, + "loss": 0.0181, + "step": 7260, + "task_loss": 0.0114500243216753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997361823384949, + "compression_loss": 0.0, + "distillation_loss": 0.014816414564847946, + "epoch": 6.9, + "learning_rate": 3.0023800783857134e-08, + "loss": 0.0141, + "step": 7261, + "task_loss": 0.0073722414672374725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997368616969457, + "compression_loss": 0.0, + "distillation_loss": 0.020906083285808563, + "epoch": 6.9, + "learning_rate": 2.950402023934318e-08, + "loss": 0.0235, + "step": 7262, + "task_loss": 0.046787478029727936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997375398881144, + "compression_loss": 0.0, + "distillation_loss": 0.045058127492666245, + "epoch": 6.9, + "learning_rate": 2.898877570568226e-08, + "loss": 0.0595, + "step": 7263, + "task_loss": 0.1898675560951233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997382169130043, + "compression_loss": 0.0, + "distillation_loss": 0.050636500120162964, + "epoch": 6.9, + "learning_rate": 2.8478067276471733e-08, + "loss": 0.0479, + "step": 7264, + "task_loss": 0.02349741943180561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997388927726194, + "compression_loss": 0.0, + "distillation_loss": 0.049480557441711426, + "epoch": 6.9, + "learning_rate": 2.7971895044487385e-08, + "loss": 0.0677, + "step": 7265, + "task_loss": 0.23184102773666382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997395674679633, + "compression_loss": 0.0, + "distillation_loss": 0.10401052981615067, + "epoch": 6.9, + "learning_rate": 2.7470259101672336e-08, + "loss": 0.0976, + "step": 7266, + "task_loss": 0.04024471715092659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997402410000396, + "compression_loss": 0.0, + "distillation_loss": 0.018137693405151367, + "epoch": 6.9, + "learning_rate": 2.6973159539153693e-08, + "loss": 0.0169, + "step": 7267, + "task_loss": 0.006021425127983093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799740913369852, + "compression_loss": 0.0, + "distillation_loss": 0.024599701166152954, + "epoch": 6.9, + "learning_rate": 2.648059644723144e-08, + "loss": 0.0234, + "step": 7268, + "task_loss": 0.012996546924114227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997415845784043, + "compression_loss": 0.0, + "distillation_loss": 0.030850766226649284, + "epoch": 6.9, + "learning_rate": 2.5992569915384014e-08, + "loss": 0.051, + "step": 7269, + "task_loss": 0.2318606823682785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997422546267, + "compression_loss": 0.0, + "distillation_loss": 0.16657190024852753, + "epoch": 6.9, + "learning_rate": 2.550908003226271e-08, + "loss": 0.1601, + "step": 7270, + "task_loss": 0.10178233683109283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997429235157429, + "compression_loss": 0.0, + "distillation_loss": 0.031046094372868538, + "epoch": 6.91, + "learning_rate": 2.5030126885694506e-08, + "loss": 0.0305, + "step": 7271, + "task_loss": 0.02560516819357872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997435912465368, + "compression_loss": 0.0, + "distillation_loss": 0.018669994547963142, + "epoch": 6.91, + "learning_rate": 2.4555710562684796e-08, + "loss": 0.0276, + "step": 7272, + "task_loss": 0.10760626196861267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997442578200852, + "compression_loss": 0.0, + "distillation_loss": 0.04650671035051346, + "epoch": 6.91, + "learning_rate": 2.408583114941465e-08, + "loss": 0.0566, + "step": 7273, + "task_loss": 0.14748124778270721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997449232373918, + "compression_loss": 0.0, + "distillation_loss": 0.0697929635643959, + "epoch": 6.91, + "learning_rate": 2.3620488731235234e-08, + "loss": 0.0814, + "step": 7274, + "task_loss": 0.18626686930656433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997455874994602, + "compression_loss": 0.0, + "distillation_loss": 0.03916598856449127, + "epoch": 6.91, + "learning_rate": 2.3159683392684483e-08, + "loss": 0.0364, + "step": 7275, + "task_loss": 0.011343579739332199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997462506072943, + "compression_loss": 0.0, + "distillation_loss": 0.05003172531723976, + "epoch": 6.91, + "learning_rate": 2.2703415217464885e-08, + "loss": 0.0553, + "step": 7276, + "task_loss": 0.10285012423992157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997469125618977, + "compression_loss": 0.0, + "distillation_loss": 0.05633166432380676, + "epoch": 6.91, + "learning_rate": 2.2251684288462915e-08, + "loss": 0.0563, + "step": 7277, + "task_loss": 0.055823661386966705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799747573364274, + "compression_loss": 0.0, + "distillation_loss": 0.02220912277698517, + "epoch": 6.91, + "learning_rate": 2.180449068773793e-08, + "loss": 0.0213, + "step": 7278, + "task_loss": 0.013472869992256165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799748233015427, + "compression_loss": 0.0, + "distillation_loss": 0.03192361444234848, + "epoch": 6.91, + "learning_rate": 2.136183449652218e-08, + "loss": 0.0313, + "step": 7279, + "task_loss": 0.025719383731484413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997488915163603, + "compression_loss": 0.0, + "distillation_loss": 0.08539696782827377, + "epoch": 6.91, + "learning_rate": 2.0923715795229115e-08, + "loss": 0.0879, + "step": 7280, + "task_loss": 0.11043383926153183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997495488680776, + "compression_loss": 0.0, + "distillation_loss": 0.050244055688381195, + "epoch": 6.91, + "learning_rate": 2.0490134663442295e-08, + "loss": 0.0595, + "step": 7281, + "task_loss": 0.14261166751384735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997502050715826, + "compression_loss": 0.0, + "distillation_loss": 0.020430155098438263, + "epoch": 6.92, + "learning_rate": 2.006109117992372e-08, + "loss": 0.0187, + "step": 7282, + "task_loss": 0.003576911985874176 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997508601278789, + "compression_loss": 0.0, + "distillation_loss": 0.07387155294418335, + "epoch": 6.92, + "learning_rate": 1.9636585422616593e-08, + "loss": 0.0831, + "step": 7283, + "task_loss": 0.16649088263511658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997515140379703, + "compression_loss": 0.0, + "distillation_loss": 0.024126257747411728, + "epoch": 6.92, + "learning_rate": 1.9216617468625908e-08, + "loss": 0.0239, + "step": 7284, + "task_loss": 0.022267458960413933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997521668028604, + "compression_loss": 0.0, + "distillation_loss": 0.1891712099313736, + "epoch": 6.92, + "learning_rate": 1.8801187394248965e-08, + "loss": 0.1828, + "step": 7285, + "task_loss": 0.12499140202999115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799752818423553, + "compression_loss": 0.0, + "distillation_loss": 0.02490517869591713, + "epoch": 6.92, + "learning_rate": 1.8390295274944846e-08, + "loss": 0.0272, + "step": 7286, + "task_loss": 0.048274118453264236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997534689010516, + "compression_loss": 0.0, + "distillation_loss": 0.031054774299263954, + "epoch": 6.92, + "learning_rate": 1.7983941185356622e-08, + "loss": 0.029, + "step": 7287, + "task_loss": 0.010997863486409187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.79975411823636, + "compression_loss": 0.0, + "distillation_loss": 0.02562297135591507, + "epoch": 6.92, + "learning_rate": 1.7582125199303023e-08, + "loss": 0.0392, + "step": 7288, + "task_loss": 0.16150373220443726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997547664304819, + "compression_loss": 0.0, + "distillation_loss": 0.06376007199287415, + "epoch": 6.92, + "learning_rate": 1.7184847389770108e-08, + "loss": 0.0598, + "step": 7289, + "task_loss": 0.02367803268134594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997554134844209, + "compression_loss": 0.0, + "distillation_loss": 0.014210294932126999, + "epoch": 6.92, + "learning_rate": 1.679210782892793e-08, + "loss": 0.0297, + "step": 7290, + "task_loss": 0.1686396598815918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997560593991807, + "compression_loss": 0.0, + "distillation_loss": 0.02178233675658703, + "epoch": 6.92, + "learning_rate": 1.6403906588122185e-08, + "loss": 0.0201, + "step": 7291, + "task_loss": 0.005438664928078651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799756704175765, + "compression_loss": 0.0, + "distillation_loss": 0.055342864245176315, + "epoch": 6.92, + "learning_rate": 1.6020243737865927e-08, + "loss": 0.0638, + "step": 7292, + "task_loss": 0.1397797167301178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997573478151776, + "compression_loss": 0.0, + "distillation_loss": 0.025284867733716965, + "epoch": 6.93, + "learning_rate": 1.5641119347858947e-08, + "loss": 0.024, + "step": 7293, + "task_loss": 0.012278718873858452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799757990318422, + "compression_loss": 0.0, + "distillation_loss": 0.013603068888187408, + "epoch": 6.93, + "learning_rate": 1.526653348696838e-08, + "loss": 0.0152, + "step": 7294, + "task_loss": 0.029902005568146706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799758631686502, + "compression_loss": 0.0, + "distillation_loss": 0.042981285601854324, + "epoch": 6.93, + "learning_rate": 1.4896486223239802e-08, + "loss": 0.0405, + "step": 7295, + "task_loss": 0.0184940155595541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997592719204212, + "compression_loss": 0.0, + "distillation_loss": 0.02033732831478119, + "epoch": 6.93, + "learning_rate": 1.4530977623891662e-08, + "loss": 0.0188, + "step": 7296, + "task_loss": 0.005222789943218231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997599110211833, + "compression_loss": 0.0, + "distillation_loss": 0.03475763648748398, + "epoch": 6.93, + "learning_rate": 1.4170007755326398e-08, + "loss": 0.0357, + "step": 7297, + "task_loss": 0.04378265514969826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799760548989792, + "compression_loss": 0.0, + "distillation_loss": 0.03861427307128906, + "epoch": 6.93, + "learning_rate": 1.3813576683111006e-08, + "loss": 0.0449, + "step": 7298, + "task_loss": 0.10126301646232605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799761185827251, + "compression_loss": 0.0, + "distillation_loss": 0.017223935574293137, + "epoch": 6.93, + "learning_rate": 1.3461684471993696e-08, + "loss": 0.0219, + "step": 7299, + "task_loss": 0.06419990956783295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799761821534564, + "compression_loss": 0.0, + "distillation_loss": 0.053291283547878265, + "epoch": 6.93, + "learning_rate": 1.3114331185898331e-08, + "loss": 0.0727, + "step": 7300, + "task_loss": 0.24761205911636353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997624561127346, + "compression_loss": 0.0, + "distillation_loss": 0.0634729415178299, + "epoch": 6.93, + "learning_rate": 1.2771516887921664e-08, + "loss": 0.0634, + "step": 7301, + "task_loss": 0.0631205290555954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997630895627664, + "compression_loss": 0.0, + "distillation_loss": 0.018962126225233078, + "epoch": 6.93, + "learning_rate": 1.2433241640338878e-08, + "loss": 0.03, + "step": 7302, + "task_loss": 0.12886041402816772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997637218856634, + "compression_loss": 0.0, + "distillation_loss": 0.16295194625854492, + "epoch": 6.94, + "learning_rate": 1.2099505504600817e-08, + "loss": 0.169, + "step": 7303, + "task_loss": 0.22337132692337036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799764353082429, + "compression_loss": 0.0, + "distillation_loss": 0.027407968416810036, + "epoch": 6.94, + "learning_rate": 1.1770308541328434e-08, + "loss": 0.0343, + "step": 7304, + "task_loss": 0.09634220600128174 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997649831540671, + "compression_loss": 0.0, + "distillation_loss": 0.023027902469038963, + "epoch": 6.94, + "learning_rate": 1.1445650810326668e-08, + "loss": 0.0246, + "step": 7305, + "task_loss": 0.038522087037563324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997656121015811, + "compression_loss": 0.0, + "distillation_loss": 0.10117096453905106, + "epoch": 6.94, + "learning_rate": 1.1125532370567793e-08, + "loss": 0.1038, + "step": 7306, + "task_loss": 0.12748871743679047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997662399259748, + "compression_loss": 0.0, + "distillation_loss": 0.029107600450515747, + "epoch": 6.94, + "learning_rate": 1.0809953280202511e-08, + "loss": 0.0362, + "step": 7307, + "task_loss": 0.09982933849096298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997668666282521, + "compression_loss": 0.0, + "distillation_loss": 0.09483398497104645, + "epoch": 6.94, + "learning_rate": 1.0498913596559967e-08, + "loss": 0.09, + "step": 7308, + "task_loss": 0.04666196554899216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997674922094165, + "compression_loss": 0.0, + "distillation_loss": 0.025720832869410515, + "epoch": 6.94, + "learning_rate": 1.0192413376139408e-08, + "loss": 0.0305, + "step": 7309, + "task_loss": 0.07334055751562119 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997681166704715, + "compression_loss": 0.0, + "distillation_loss": 0.0329585038125515, + "epoch": 6.94, + "learning_rate": 9.890452674618522e-09, + "loss": 0.0317, + "step": 7310, + "task_loss": 0.020583661273121834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799768740012421, + "compression_loss": 0.0, + "distillation_loss": 0.021503139287233353, + "epoch": 6.94, + "learning_rate": 9.593031546853426e-09, + "loss": 0.0232, + "step": 7311, + "task_loss": 0.03804744780063629 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997693622362688, + "compression_loss": 0.0, + "distillation_loss": 0.053460683673620224, + "epoch": 6.94, + "learning_rate": 9.300150046864796e-09, + "loss": 0.0505, + "step": 7312, + "task_loss": 0.02390519343316555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997699833430184, + "compression_loss": 0.0, + "distillation_loss": 0.01604924164712429, + "epoch": 6.94, + "learning_rate": 9.011808227865625e-09, + "loss": 0.0208, + "step": 7313, + "task_loss": 0.06401699781417847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997706033336734, + "compression_loss": 0.0, + "distillation_loss": 0.012479826807975769, + "epoch": 6.95, + "learning_rate": 8.728006142225131e-09, + "loss": 0.0118, + "step": 7314, + "task_loss": 0.005266919732093811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997712222092377, + "compression_loss": 0.0, + "distillation_loss": 0.05143355578184128, + "epoch": 6.95, + "learning_rate": 8.448743841504847e-09, + "loss": 0.0551, + "step": 7315, + "task_loss": 0.08826296031475067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997718399707148, + "compression_loss": 0.0, + "distillation_loss": 0.06294449418783188, + "epoch": 6.95, + "learning_rate": 8.174021376428087e-09, + "loss": 0.0601, + "step": 7316, + "task_loss": 0.03437443822622299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997724566191085, + "compression_loss": 0.0, + "distillation_loss": 0.08268649876117706, + "epoch": 6.95, + "learning_rate": 7.903838796904927e-09, + "loss": 0.0772, + "step": 7317, + "task_loss": 0.028297651559114456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997730721554224, + "compression_loss": 0.0, + "distillation_loss": 0.15433204174041748, + "epoch": 6.95, + "learning_rate": 7.638196152010002e-09, + "loss": 0.1521, + "step": 7318, + "task_loss": 0.1320251226425171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997736865806603, + "compression_loss": 0.0, + "distillation_loss": 0.018636398017406464, + "epoch": 6.95, + "learning_rate": 7.37709349000193e-09, + "loss": 0.0172, + "step": 7319, + "task_loss": 0.0046669188886880875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997742998958257, + "compression_loss": 0.0, + "distillation_loss": 0.0154360830783844, + "epoch": 6.95, + "learning_rate": 7.120530858312213e-09, + "loss": 0.0198, + "step": 7320, + "task_loss": 0.05901549756526947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997749121019225, + "compression_loss": 0.0, + "distillation_loss": 0.014429114758968353, + "epoch": 6.95, + "learning_rate": 6.8685083035452404e-09, + "loss": 0.014, + "step": 7321, + "task_loss": 0.010067546740174294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997755231999542, + "compression_loss": 0.0, + "distillation_loss": 0.025817744433879852, + "epoch": 6.95, + "learning_rate": 6.621025871481057e-09, + "loss": 0.0296, + "step": 7322, + "task_loss": 0.0636395514011383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997761331909246, + "compression_loss": 0.0, + "distillation_loss": 0.033705998212099075, + "epoch": 6.95, + "learning_rate": 6.378083607075369e-09, + "loss": 0.0338, + "step": 7323, + "task_loss": 0.03477884456515312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997767420758373, + "compression_loss": 0.0, + "distillation_loss": 0.03906092047691345, + "epoch": 6.96, + "learning_rate": 6.139681554462318e-09, + "loss": 0.0507, + "step": 7324, + "task_loss": 0.1549626588821411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997773498556959, + "compression_loss": 0.0, + "distillation_loss": 0.051231879740953445, + "epoch": 6.96, + "learning_rate": 5.905819756948927e-09, + "loss": 0.0497, + "step": 7325, + "task_loss": 0.03555578738451004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997779565315044, + "compression_loss": 0.0, + "distillation_loss": 0.03208434581756592, + "epoch": 6.96, + "learning_rate": 5.676498257015106e-09, + "loss": 0.041, + "step": 7326, + "task_loss": 0.12134852260351181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997785621042662, + "compression_loss": 0.0, + "distillation_loss": 0.1095762699842453, + "epoch": 6.96, + "learning_rate": 5.4517170963191974e-09, + "loss": 0.1053, + "step": 7327, + "task_loss": 0.06731220334768295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799779166574985, + "compression_loss": 0.0, + "distillation_loss": 0.03222663700580597, + "epoch": 6.96, + "learning_rate": 5.231476315695205e-09, + "loss": 0.0354, + "step": 7328, + "task_loss": 0.0638594701886177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997797699446646, + "compression_loss": 0.0, + "distillation_loss": 0.01883310079574585, + "epoch": 6.96, + "learning_rate": 5.015775955150015e-09, + "loss": 0.0177, + "step": 7329, + "task_loss": 0.007643511518836021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997803722143086, + "compression_loss": 0.0, + "distillation_loss": 0.018376603722572327, + "epoch": 6.96, + "learning_rate": 4.804616053866173e-09, + "loss": 0.0172, + "step": 7330, + "task_loss": 0.00702180340886116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997809733849208, + "compression_loss": 0.0, + "distillation_loss": 0.06521350890398026, + "epoch": 6.96, + "learning_rate": 4.5979966501991104e-09, + "loss": 0.0821, + "step": 7331, + "task_loss": 0.2336568832397461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997815734575047, + "compression_loss": 0.0, + "distillation_loss": 0.045030295848846436, + "epoch": 6.96, + "learning_rate": 4.395917781688242e-09, + "loss": 0.0426, + "step": 7332, + "task_loss": 0.021171823143959045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997821724330642, + "compression_loss": 0.0, + "distillation_loss": 0.06834699958562851, + "epoch": 6.96, + "learning_rate": 4.19837948503754e-09, + "loss": 0.0635, + "step": 7333, + "task_loss": 0.019571850076317787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997827703126027, + "compression_loss": 0.0, + "distillation_loss": 0.039298366755247116, + "epoch": 6.96, + "learning_rate": 4.0053817961321905e-09, + "loss": 0.0437, + "step": 7334, + "task_loss": 0.08349810540676117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799783367097124, + "compression_loss": 0.0, + "distillation_loss": 0.04764103144407272, + "epoch": 6.97, + "learning_rate": 3.8169247500330355e-09, + "loss": 0.0447, + "step": 7335, + "task_loss": 0.01831081695854664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799783962787632, + "compression_loss": 0.0, + "distillation_loss": 0.05844502151012421, + "epoch": 6.97, + "learning_rate": 3.633008380971026e-09, + "loss": 0.0784, + "step": 7336, + "task_loss": 0.25827866792678833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997845573851301, + "compression_loss": 0.0, + "distillation_loss": 0.03211439028382301, + "epoch": 6.97, + "learning_rate": 3.453632722358324e-09, + "loss": 0.0347, + "step": 7337, + "task_loss": 0.058454521000385284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997851508906221, + "compression_loss": 0.0, + "distillation_loss": 0.021087612956762314, + "epoch": 6.97, + "learning_rate": 3.278797806774425e-09, + "loss": 0.0199, + "step": 7338, + "task_loss": 0.009137500077486038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997857433051117, + "compression_loss": 0.0, + "distillation_loss": 0.07293461263179779, + "epoch": 6.97, + "learning_rate": 3.1085036659855847e-09, + "loss": 0.0815, + "step": 7339, + "task_loss": 0.15890468657016754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997863346296024, + "compression_loss": 0.0, + "distillation_loss": 0.04177660495042801, + "epoch": 6.97, + "learning_rate": 2.9427503309226166e-09, + "loss": 0.0499, + "step": 7340, + "task_loss": 0.12267521023750305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997869248650982, + "compression_loss": 0.0, + "distillation_loss": 0.06666393578052521, + "epoch": 6.97, + "learning_rate": 2.7815378316947694e-09, + "loss": 0.0758, + "step": 7341, + "task_loss": 0.15778383612632751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997875140126025, + "compression_loss": 0.0, + "distillation_loss": 0.04870596528053284, + "epoch": 6.97, + "learning_rate": 2.624866197589726e-09, + "loss": 0.046, + "step": 7342, + "task_loss": 0.021872013807296753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997881020731191, + "compression_loss": 0.0, + "distillation_loss": 0.02343062311410904, + "epoch": 6.97, + "learning_rate": 2.4727354570680537e-09, + "loss": 0.0323, + "step": 7343, + "task_loss": 0.11193156987428665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997886890476517, + "compression_loss": 0.0, + "distillation_loss": 0.0678223967552185, + "epoch": 6.97, + "learning_rate": 2.3251456377604288e-09, + "loss": 0.0726, + "step": 7344, + "task_loss": 0.1155007928609848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799789274937204, + "compression_loss": 0.0, + "distillation_loss": 0.11146111786365509, + "epoch": 6.98, + "learning_rate": 2.1820967664815116e-09, + "loss": 0.1153, + "step": 7345, + "task_loss": 0.15025877952575684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997898597427796, + "compression_loss": 0.0, + "distillation_loss": 0.058320388197898865, + "epoch": 6.98, + "learning_rate": 2.043588869216073e-09, + "loss": 0.0701, + "step": 7346, + "task_loss": 0.17589399218559265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997904434653822, + "compression_loss": 0.0, + "distillation_loss": 0.03350623697042465, + "epoch": 6.98, + "learning_rate": 1.9096219711245423e-09, + "loss": 0.0307, + "step": 7347, + "task_loss": 0.005610005930066109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997910261060155, + "compression_loss": 0.0, + "distillation_loss": 0.02051948755979538, + "epoch": 6.98, + "learning_rate": 1.780196096540232e-09, + "loss": 0.0246, + "step": 7348, + "task_loss": 0.06097707524895668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997916076656832, + "compression_loss": 0.0, + "distillation_loss": 0.021704774349927902, + "epoch": 6.98, + "learning_rate": 1.6553112689776662e-09, + "loss": 0.0294, + "step": 7349, + "task_loss": 0.0981798768043518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799792188145389, + "compression_loss": 0.0, + "distillation_loss": 0.10756382346153259, + "epoch": 6.98, + "learning_rate": 1.5349675111214768e-09, + "loss": 0.1035, + "step": 7350, + "task_loss": 0.06680326163768768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997927675461366, + "compression_loss": 0.0, + "distillation_loss": 0.06751997768878937, + "epoch": 6.98, + "learning_rate": 1.419164844831955e-09, + "loss": 0.0741, + "step": 7351, + "task_loss": 0.13287892937660217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997933458689296, + "compression_loss": 0.0, + "distillation_loss": 0.028206095099449158, + "epoch": 6.98, + "learning_rate": 1.3079032911450516e-09, + "loss": 0.0329, + "step": 7352, + "task_loss": 0.07551902532577515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997939231147716, + "compression_loss": 0.0, + "distillation_loss": 0.0961083471775055, + "epoch": 6.98, + "learning_rate": 1.201182870272377e-09, + "loss": 0.0902, + "step": 7353, + "task_loss": 0.03751200810074806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997944992846665, + "compression_loss": 0.0, + "distillation_loss": 0.01709740236401558, + "epoch": 6.98, + "learning_rate": 1.0990036016012005e-09, + "loss": 0.0161, + "step": 7354, + "task_loss": 0.00756429135799408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997950743796178, + "compression_loss": 0.0, + "distillation_loss": 0.024914680048823357, + "epoch": 6.98, + "learning_rate": 1.0013655036916759e-09, + "loss": 0.0318, + "step": 7355, + "task_loss": 0.094090037047863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997956484006293, + "compression_loss": 0.0, + "distillation_loss": 0.013773511163890362, + "epoch": 6.99, + "learning_rate": 9.082685942796154e-10, + "loss": 0.0129, + "step": 7356, + "task_loss": 0.004718998447060585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997962213487048, + "compression_loss": 0.0, + "distillation_loss": 0.020373770967125893, + "epoch": 6.99, + "learning_rate": 8.197128902792672e-10, + "loss": 0.0326, + "step": 7357, + "task_loss": 0.1427568942308426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997967932248476, + "compression_loss": 0.0, + "distillation_loss": 0.08715569972991943, + "epoch": 6.99, + "learning_rate": 7.356984077722117e-10, + "loss": 0.0883, + "step": 7358, + "task_loss": 0.09825003892183304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997973640300617, + "compression_loss": 0.0, + "distillation_loss": 0.03974455967545509, + "epoch": 6.99, + "learning_rate": 6.562251620267912e-10, + "loss": 0.0361, + "step": 7359, + "task_loss": 0.0036314092576503754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997979337653507, + "compression_loss": 0.0, + "distillation_loss": 0.10460391640663147, + "epoch": 6.99, + "learning_rate": 5.812931674759048e-10, + "loss": 0.1047, + "step": 7360, + "task_loss": 0.10604080557823181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997985024317182, + "compression_loss": 0.0, + "distillation_loss": 0.020527970045804977, + "epoch": 6.99, + "learning_rate": 5.109024377308869e-10, + "loss": 0.0209, + "step": 7361, + "task_loss": 0.02422548085451126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799799070030168, + "compression_loss": 0.0, + "distillation_loss": 0.09669515490531921, + "epoch": 6.99, + "learning_rate": 4.450529855787311e-10, + "loss": 0.0906, + "step": 7362, + "task_loss": 0.03620155528187752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7997996365617038, + "compression_loss": 0.0, + "distillation_loss": 0.026915445923805237, + "epoch": 6.99, + "learning_rate": 3.8374482298209057e-10, + "loss": 0.0248, + "step": 7363, + "task_loss": 0.005302935838699341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998002020273292, + "compression_loss": 0.0, + "distillation_loss": 0.028947005048394203, + "epoch": 6.99, + "learning_rate": 3.2697796107650226e-10, + "loss": 0.0354, + "step": 7364, + "task_loss": 0.09320739656686783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998007664280478, + "compression_loss": 0.0, + "distillation_loss": 0.013993775472044945, + "epoch": 6.99, + "learning_rate": 2.7475241017871355e-10, + "loss": 0.0266, + "step": 7365, + "task_loss": 0.1398152858018875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998013297648634, + "compression_loss": 0.0, + "distillation_loss": 0.0770372524857521, + "epoch": 7.0, + "learning_rate": 2.2706817977002914e-10, + "loss": 0.0781, + "step": 7366, + "task_loss": 0.0880659967660904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998018920387797, + "compression_loss": 0.0, + "distillation_loss": 0.052641429007053375, + "epoch": 7.0, + "learning_rate": 1.8392527851296415e-10, + "loss": 0.0551, + "step": 7367, + "task_loss": 0.0774116963148117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998024532508003, + "compression_loss": 0.0, + "distillation_loss": 0.05743606388568878, + "epoch": 7.0, + "learning_rate": 1.453237142484687e-10, + "loss": 0.0612, + "step": 7368, + "task_loss": 0.09504348039627075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.799803013401929, + "compression_loss": 0.0, + "distillation_loss": 0.026639528572559357, + "epoch": 7.0, + "learning_rate": 1.112634939848256e-10, + "loss": 0.0321, + "step": 7369, + "task_loss": 0.08151736855506897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998035724931695, + "compression_loss": 0.0, + "distillation_loss": 0.03500088304281235, + "epoch": 7.0, + "learning_rate": 8.174462391430382e-11, + "loss": 0.0384, + "step": 7370, + "task_loss": 0.06867900490760803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998041305255252, + "compression_loss": 0.0, + "distillation_loss": 0.012465332634747028, + "epoch": 7.0, + "learning_rate": 5.676710939372942e-11, + "loss": 0.0127, + "step": 7371, + "task_loss": 0.014598931185901165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998046875, + "compression_loss": 0.0, + "distillation_loss": 0.09104140102863312, + "epoch": 7.0, + "learning_rate": 3.633095496113903e-11, + "loss": 0.0992, + "step": 7372, + "task_loss": 0.17217549681663513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998052434175976, + "compression_loss": 0.0, + "distillation_loss": 0.07841981947422028, + "epoch": 7.0, + "learning_rate": 2.0436164330228658e-11, + "loss": 0.0708, + "step": 7373, + "task_loss": 0.0018375962972640991 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998057982793217, + "compression_loss": 0.0, + "distillation_loss": 0.08232280611991882, + "epoch": 7.0, + "learning_rate": 9.082740390353728e-12, + "loss": 0.0767, + "step": 7374, + "task_loss": 0.026533475145697594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998063520861759, + "compression_loss": 0.0, + "distillation_loss": 0.07200402021408081, + "epoch": 7.0, + "learning_rate": 2.270685200977951e-12, + "loss": 0.0776, + "step": 7375, + "task_loss": 0.12824085354804993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998069048391638, + "compression_loss": 0.0, + "distillation_loss": 0.19095724821090698, + "epoch": 7.0, + "learning_rate": 5e-05, + "loss": 0.1839, + "step": 7376, + "task_loss": 0.12048640847206116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998074565392893, + "compression_loss": 0.0, + "distillation_loss": 0.04641976207494736, + "epoch": 7.01, + "learning_rate": 4.99999977293148e-05, + "loss": 0.05, + "step": 7377, + "task_loss": 0.0825609490275383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998080071875558, + "compression_loss": 0.0, + "distillation_loss": 0.1098412275314331, + "epoch": 7.01, + "learning_rate": 4.999999091725961e-05, + "loss": 0.111, + "step": 7378, + "task_loss": 0.12164577841758728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998085567849673, + "compression_loss": 0.0, + "distillation_loss": 0.07424164563417435, + "epoch": 7.01, + "learning_rate": 4.999997956383567e-05, + "loss": 0.0833, + "step": 7379, + "task_loss": 0.16513806581497192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998091053325274, + "compression_loss": 0.0, + "distillation_loss": 0.08631199598312378, + "epoch": 7.01, + "learning_rate": 4.999996366904504e-05, + "loss": 0.0779, + "step": 7380, + "task_loss": 0.002591833472251892 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998096528312395, + "compression_loss": 0.0, + "distillation_loss": 0.09567269682884216, + "epoch": 7.01, + "learning_rate": 4.999994323289061e-05, + "loss": 0.0913, + "step": 7381, + "task_loss": 0.05151829496026039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998101992821076, + "compression_loss": 0.0, + "distillation_loss": 0.0753033235669136, + "epoch": 7.01, + "learning_rate": 4.999991825537609e-05, + "loss": 0.0694, + "step": 7382, + "task_loss": 0.016505436971783638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998107446861353, + "compression_loss": 0.0, + "distillation_loss": 0.038341403007507324, + "epoch": 7.01, + "learning_rate": 4.999988873650602e-05, + "loss": 0.0354, + "step": 7383, + "task_loss": 0.008481014519929886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998112890443263, + "compression_loss": 0.0, + "distillation_loss": 0.04590607061982155, + "epoch": 7.01, + "learning_rate": 4.999985467628575e-05, + "loss": 0.0532, + "step": 7384, + "task_loss": 0.11850175261497498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998118323576842, + "compression_loss": 0.0, + "distillation_loss": 0.06865381449460983, + "epoch": 7.01, + "learning_rate": 4.999981607472149e-05, + "loss": 0.0665, + "step": 7385, + "task_loss": 0.046819057315588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998123746272127, + "compression_loss": 0.0, + "distillation_loss": 0.06713886559009552, + "epoch": 7.01, + "learning_rate": 4.999977293182023e-05, + "loss": 0.0622, + "step": 7386, + "task_loss": 0.018027808517217636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998129158539155, + "compression_loss": 0.0, + "distillation_loss": 0.03523305803537369, + "epoch": 7.02, + "learning_rate": 4.999972524758982e-05, + "loss": 0.0459, + "step": 7387, + "task_loss": 0.14175420999526978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998134560387963, + "compression_loss": 0.0, + "distillation_loss": 0.10418183356523514, + "epoch": 7.02, + "learning_rate": 4.999967302203893e-05, + "loss": 0.106, + "step": 7388, + "task_loss": 0.12198633700609207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998139951828587, + "compression_loss": 0.0, + "distillation_loss": 0.02372100204229355, + "epoch": 7.02, + "learning_rate": 4.9999616255177016e-05, + "loss": 0.0308, + "step": 7389, + "task_loss": 0.09421825408935547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998145332871065, + "compression_loss": 0.0, + "distillation_loss": 0.028630398213863373, + "epoch": 7.02, + "learning_rate": 4.999955494701443e-05, + "loss": 0.0346, + "step": 7390, + "task_loss": 0.08789223432540894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998150703525434, + "compression_loss": 0.0, + "distillation_loss": 0.14551189541816711, + "epoch": 7.02, + "learning_rate": 4.999948909756227e-05, + "loss": 0.1518, + "step": 7391, + "task_loss": 0.20824161171913147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998156063801729, + "compression_loss": 0.0, + "distillation_loss": 0.13446444272994995, + "epoch": 7.02, + "learning_rate": 4.9999418706832525e-05, + "loss": 0.1363, + "step": 7392, + "task_loss": 0.15303920209407806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998161413709989, + "compression_loss": 0.0, + "distillation_loss": 0.025591343641281128, + "epoch": 7.02, + "learning_rate": 4.9999343774837976e-05, + "loss": 0.0238, + "step": 7393, + "task_loss": 0.007202155888080597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799816675326025, + "compression_loss": 0.0, + "distillation_loss": 0.03282269090414047, + "epoch": 7.02, + "learning_rate": 4.999926430159223e-05, + "loss": 0.031, + "step": 7394, + "task_loss": 0.014277882874011993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998172082462548, + "compression_loss": 0.0, + "distillation_loss": 0.05018794909119606, + "epoch": 7.02, + "learning_rate": 4.9999180287109725e-05, + "loss": 0.0654, + "step": 7395, + "task_loss": 0.20274071395397186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799817740132692, + "compression_loss": 0.0, + "distillation_loss": 0.027372337877750397, + "epoch": 7.02, + "learning_rate": 4.999909173140572e-05, + "loss": 0.0256, + "step": 7396, + "task_loss": 0.01010635495185852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998182709863404, + "compression_loss": 0.0, + "distillation_loss": 0.02192852459847927, + "epoch": 7.02, + "learning_rate": 4.999899863449631e-05, + "loss": 0.0256, + "step": 7397, + "task_loss": 0.05868425592780113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998188008082036, + "compression_loss": 0.0, + "distillation_loss": 0.06173873692750931, + "epoch": 7.03, + "learning_rate": 4.99989009963984e-05, + "loss": 0.0675, + "step": 7398, + "task_loss": 0.1194574236869812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998193295992853, + "compression_loss": 0.0, + "distillation_loss": 0.06959561258554459, + "epoch": 7.03, + "learning_rate": 4.999879881712973e-05, + "loss": 0.0697, + "step": 7399, + "task_loss": 0.07095710933208466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998198573605891, + "compression_loss": 0.0, + "distillation_loss": 0.08823947608470917, + "epoch": 7.03, + "learning_rate": 4.999869209670885e-05, + "loss": 0.0889, + "step": 7400, + "task_loss": 0.0944376289844513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998203840931188, + "compression_loss": 0.0, + "distillation_loss": 0.03558321297168732, + "epoch": 7.03, + "learning_rate": 4.999858083515517e-05, + "loss": 0.0466, + "step": 7401, + "task_loss": 0.14564195275306702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799820909797878, + "compression_loss": 0.0, + "distillation_loss": 0.1047942191362381, + "epoch": 7.03, + "learning_rate": 4.999846503248888e-05, + "loss": 0.1106, + "step": 7402, + "task_loss": 0.16298425197601318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998214344758705, + "compression_loss": 0.0, + "distillation_loss": 0.03409132733941078, + "epoch": 7.03, + "learning_rate": 4.9998344688731027e-05, + "loss": 0.0435, + "step": 7403, + "task_loss": 0.12837141752243042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998219581280999, + "compression_loss": 0.0, + "distillation_loss": 0.025968341156840324, + "epoch": 7.03, + "learning_rate": 4.999821980390346e-05, + "loss": 0.0237, + "step": 7404, + "task_loss": 0.003702618181705475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998224807555698, + "compression_loss": 0.0, + "distillation_loss": 0.04202386364340782, + "epoch": 7.03, + "learning_rate": 4.999809037802888e-05, + "loss": 0.0391, + "step": 7405, + "task_loss": 0.013203632086515427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799823002359284, + "compression_loss": 0.0, + "distillation_loss": 0.020798005163669586, + "epoch": 7.03, + "learning_rate": 4.999795641113079e-05, + "loss": 0.0203, + "step": 7406, + "task_loss": 0.016108643263578415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998235229402462, + "compression_loss": 0.0, + "distillation_loss": 0.04067230597138405, + "epoch": 7.03, + "learning_rate": 4.9997817903233527e-05, + "loss": 0.0412, + "step": 7407, + "task_loss": 0.04560984671115875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998240424994599, + "compression_loss": 0.0, + "distillation_loss": 0.03840542584657669, + "epoch": 7.04, + "learning_rate": 4.999767485436224e-05, + "loss": 0.0402, + "step": 7408, + "task_loss": 0.05598912388086319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998245610379291, + "compression_loss": 0.0, + "distillation_loss": 0.028701873496174812, + "epoch": 7.04, + "learning_rate": 4.999752726454293e-05, + "loss": 0.0274, + "step": 7409, + "task_loss": 0.015496550127863884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998250785566572, + "compression_loss": 0.0, + "distillation_loss": 0.03353622183203697, + "epoch": 7.04, + "learning_rate": 4.9997375133802415e-05, + "loss": 0.0376, + "step": 7410, + "task_loss": 0.07444252073764801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799825595056648, + "compression_loss": 0.0, + "distillation_loss": 0.021153662353754044, + "epoch": 7.04, + "learning_rate": 4.999721846216831e-05, + "loss": 0.0196, + "step": 7411, + "task_loss": 0.005829468369483948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998261105389051, + "compression_loss": 0.0, + "distillation_loss": 0.059076711535453796, + "epoch": 7.04, + "learning_rate": 4.999705724966908e-05, + "loss": 0.0687, + "step": 7412, + "task_loss": 0.15493744611740112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998266250044322, + "compression_loss": 0.0, + "distillation_loss": 0.06167703866958618, + "epoch": 7.04, + "learning_rate": 4.999689149633402e-05, + "loss": 0.0608, + "step": 7413, + "task_loss": 0.05248799920082092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799827138454233, + "compression_loss": 0.0, + "distillation_loss": 0.02228119783103466, + "epoch": 7.04, + "learning_rate": 4.999672120219323e-05, + "loss": 0.0276, + "step": 7414, + "task_loss": 0.07509797811508179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998276508893114, + "compression_loss": 0.0, + "distillation_loss": 0.03827614709734917, + "epoch": 7.04, + "learning_rate": 4.999654636727764e-05, + "loss": 0.0452, + "step": 7415, + "task_loss": 0.10720621794462204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998281623106708, + "compression_loss": 0.0, + "distillation_loss": 0.10008041560649872, + "epoch": 7.04, + "learning_rate": 4.9996366991619034e-05, + "loss": 0.0968, + "step": 7416, + "task_loss": 0.06756041944026947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998286727193149, + "compression_loss": 0.0, + "distillation_loss": 0.04735985025763512, + "epoch": 7.04, + "learning_rate": 4.999618307524997e-05, + "loss": 0.0456, + "step": 7417, + "task_loss": 0.02975446730852127 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998291821162474, + "compression_loss": 0.0, + "distillation_loss": 0.02187761291861534, + "epoch": 7.04, + "learning_rate": 4.999599461820387e-05, + "loss": 0.0201, + "step": 7418, + "task_loss": 0.0042492058128118515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998296905024722, + "compression_loss": 0.0, + "distillation_loss": 0.020126711577177048, + "epoch": 7.05, + "learning_rate": 4.999580162051497e-05, + "loss": 0.0187, + "step": 7419, + "task_loss": 0.005694573745131493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998301978789928, + "compression_loss": 0.0, + "distillation_loss": 0.02660163678228855, + "epoch": 7.05, + "learning_rate": 4.9995604082218314e-05, + "loss": 0.0327, + "step": 7420, + "task_loss": 0.08736631274223328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998307042468128, + "compression_loss": 0.0, + "distillation_loss": 0.05452951416373253, + "epoch": 7.05, + "learning_rate": 4.99954020033498e-05, + "loss": 0.0549, + "step": 7421, + "task_loss": 0.058631766587495804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799831209606936, + "compression_loss": 0.0, + "distillation_loss": 0.0896400734782219, + "epoch": 7.05, + "learning_rate": 4.9995195383946135e-05, + "loss": 0.0861, + "step": 7422, + "task_loss": 0.053857218474149704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998317139603661, + "compression_loss": 0.0, + "distillation_loss": 0.026753665879368782, + "epoch": 7.05, + "learning_rate": 4.999498422404485e-05, + "loss": 0.0279, + "step": 7423, + "task_loss": 0.03794853016734123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998322173081067, + "compression_loss": 0.0, + "distillation_loss": 0.023596402257680893, + "epoch": 7.05, + "learning_rate": 4.999476852368431e-05, + "loss": 0.0243, + "step": 7424, + "task_loss": 0.030923640355467796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998327196511615, + "compression_loss": 0.0, + "distillation_loss": 0.060296621173620224, + "epoch": 7.05, + "learning_rate": 4.999454828290369e-05, + "loss": 0.0597, + "step": 7425, + "task_loss": 0.05454102158546448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998332209905342, + "compression_loss": 0.0, + "distillation_loss": 0.0592413991689682, + "epoch": 7.05, + "learning_rate": 4.999432350174299e-05, + "loss": 0.0575, + "step": 7426, + "task_loss": 0.04156405106186867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998337213272286, + "compression_loss": 0.0, + "distillation_loss": 0.07152185589075089, + "epoch": 7.05, + "learning_rate": 4.9994094180243055e-05, + "loss": 0.0712, + "step": 7427, + "task_loss": 0.0684933215379715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998342206622482, + "compression_loss": 0.0, + "distillation_loss": 0.0811118632555008, + "epoch": 7.05, + "learning_rate": 4.999386031844554e-05, + "loss": 0.0814, + "step": 7428, + "task_loss": 0.08375652134418488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998347189965967, + "compression_loss": 0.0, + "distillation_loss": 0.0416887030005455, + "epoch": 7.06, + "learning_rate": 4.999362191639293e-05, + "loss": 0.0431, + "step": 7429, + "task_loss": 0.05558731034398079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799835216331278, + "compression_loss": 0.0, + "distillation_loss": 0.04246686398983002, + "epoch": 7.06, + "learning_rate": 4.999337897412852e-05, + "loss": 0.0439, + "step": 7430, + "task_loss": 0.05704366788268089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998357126672955, + "compression_loss": 0.0, + "distillation_loss": 0.028290575370192528, + "epoch": 7.06, + "learning_rate": 4.999313149169645e-05, + "loss": 0.0431, + "step": 7431, + "task_loss": 0.17606376111507416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799836208005653, + "compression_loss": 0.0, + "distillation_loss": 0.08979344367980957, + "epoch": 7.06, + "learning_rate": 4.999287946914169e-05, + "loss": 0.0868, + "step": 7432, + "task_loss": 0.06011103093624115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998367023473542, + "compression_loss": 0.0, + "distillation_loss": 0.06186497583985329, + "epoch": 7.06, + "learning_rate": 4.999262290651e-05, + "loss": 0.0586, + "step": 7433, + "task_loss": 0.02930166944861412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998371956934027, + "compression_loss": 0.0, + "distillation_loss": 0.022470489144325256, + "epoch": 7.06, + "learning_rate": 4.9992361803847995e-05, + "loss": 0.0208, + "step": 7434, + "task_loss": 0.005419734865427017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998376880448024, + "compression_loss": 0.0, + "distillation_loss": 0.056562915444374084, + "epoch": 7.06, + "learning_rate": 4.99920961612031e-05, + "loss": 0.0685, + "step": 7435, + "task_loss": 0.17543764412403107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998381794025567, + "compression_loss": 0.0, + "distillation_loss": 0.05138598382472992, + "epoch": 7.06, + "learning_rate": 4.9991825978623574e-05, + "loss": 0.0542, + "step": 7436, + "task_loss": 0.07962413132190704 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998386697676695, + "compression_loss": 0.0, + "distillation_loss": 0.015319476835429668, + "epoch": 7.06, + "learning_rate": 4.9991551256158495e-05, + "loss": 0.0277, + "step": 7437, + "task_loss": 0.13944551348686218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998391591411443, + "compression_loss": 0.0, + "distillation_loss": 0.01975143700838089, + "epoch": 7.06, + "learning_rate": 4.999127199385778e-05, + "loss": 0.0257, + "step": 7438, + "task_loss": 0.07912784814834595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799839647523985, + "compression_loss": 0.0, + "distillation_loss": 0.07420433312654495, + "epoch": 7.06, + "learning_rate": 4.999098819177214e-05, + "loss": 0.0816, + "step": 7439, + "task_loss": 0.14786875247955322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799840134917195, + "compression_loss": 0.0, + "distillation_loss": 0.027095727622509003, + "epoch": 7.07, + "learning_rate": 4.999069984995314e-05, + "loss": 0.0319, + "step": 7440, + "task_loss": 0.07478499412536621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998406213217781, + "compression_loss": 0.0, + "distillation_loss": 0.04266301542520523, + "epoch": 7.07, + "learning_rate": 4.999040696845315e-05, + "loss": 0.0465, + "step": 7441, + "task_loss": 0.08147358894348145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998411067387382, + "compression_loss": 0.0, + "distillation_loss": 0.021399151533842087, + "epoch": 7.07, + "learning_rate": 4.999010954732538e-05, + "loss": 0.026, + "step": 7442, + "task_loss": 0.06721732020378113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998415911690787, + "compression_loss": 0.0, + "distillation_loss": 0.04915117844939232, + "epoch": 7.07, + "learning_rate": 4.998980758662386e-05, + "loss": 0.0452, + "step": 7443, + "task_loss": 0.009514054283499718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998420746138034, + "compression_loss": 0.0, + "distillation_loss": 0.08443302661180496, + "epoch": 7.07, + "learning_rate": 4.998950108640345e-05, + "loss": 0.0882, + "step": 7444, + "task_loss": 0.12241680175065994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799842557073916, + "compression_loss": 0.0, + "distillation_loss": 0.03232605382800102, + "epoch": 7.07, + "learning_rate": 4.99891900467198e-05, + "loss": 0.0313, + "step": 7445, + "task_loss": 0.022218016907572746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998430385504202, + "compression_loss": 0.0, + "distillation_loss": 0.032280419021844864, + "epoch": 7.07, + "learning_rate": 4.9988874467629435e-05, + "loss": 0.0345, + "step": 7446, + "task_loss": 0.05450471490621567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998435190443195, + "compression_loss": 0.0, + "distillation_loss": 0.027930978685617447, + "epoch": 7.07, + "learning_rate": 4.998855434918968e-05, + "loss": 0.0262, + "step": 7447, + "task_loss": 0.010617373511195183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998439985566178, + "compression_loss": 0.0, + "distillation_loss": 0.06132368743419647, + "epoch": 7.07, + "learning_rate": 4.998822969145868e-05, + "loss": 0.0739, + "step": 7448, + "task_loss": 0.18746685981750488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998444770883186, + "compression_loss": 0.0, + "distillation_loss": 0.03330961614847183, + "epoch": 7.07, + "learning_rate": 4.99879004944954e-05, + "loss": 0.0313, + "step": 7449, + "task_loss": 0.012728353962302208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998449546404258, + "compression_loss": 0.0, + "distillation_loss": 0.018877487629652023, + "epoch": 7.08, + "learning_rate": 4.998756675835966e-05, + "loss": 0.0173, + "step": 7450, + "task_loss": 0.0031468812376260757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998454312139429, + "compression_loss": 0.0, + "distillation_loss": 0.08477997779846191, + "epoch": 7.08, + "learning_rate": 4.9987228483112083e-05, + "loss": 0.0814, + "step": 7451, + "task_loss": 0.05088125914335251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998459068098737, + "compression_loss": 0.0, + "distillation_loss": 0.05044269561767578, + "epoch": 7.08, + "learning_rate": 4.998688566881411e-05, + "loss": 0.0471, + "step": 7452, + "task_loss": 0.016654808074235916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998463814292217, + "compression_loss": 0.0, + "distillation_loss": 0.04332347959280014, + "epoch": 7.08, + "learning_rate": 4.998653831552801e-05, + "loss": 0.0424, + "step": 7453, + "task_loss": 0.034489430487155914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998468550729908, + "compression_loss": 0.0, + "distillation_loss": 0.09742948412895203, + "epoch": 7.08, + "learning_rate": 4.998618642331689e-05, + "loss": 0.1015, + "step": 7454, + "task_loss": 0.13810521364212036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998473277421846, + "compression_loss": 0.0, + "distillation_loss": 0.06396178901195526, + "epoch": 7.08, + "learning_rate": 4.9985829992244675e-05, + "loss": 0.0684, + "step": 7455, + "task_loss": 0.10790328681468964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998477994378067, + "compression_loss": 0.0, + "distillation_loss": 0.04675624519586563, + "epoch": 7.08, + "learning_rate": 4.998546902237611e-05, + "loss": 0.0458, + "step": 7456, + "task_loss": 0.03740784153342247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998482701608609, + "compression_loss": 0.0, + "distillation_loss": 0.018789753317832947, + "epoch": 7.08, + "learning_rate": 4.9985103513776764e-05, + "loss": 0.0245, + "step": 7457, + "task_loss": 0.07571886479854584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998487399123507, + "compression_loss": 0.0, + "distillation_loss": 0.06566867977380753, + "epoch": 7.08, + "learning_rate": 4.998473346651303e-05, + "loss": 0.0631, + "step": 7458, + "task_loss": 0.03988581523299217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998492086932801, + "compression_loss": 0.0, + "distillation_loss": 0.03311315178871155, + "epoch": 7.08, + "learning_rate": 4.9984358880652146e-05, + "loss": 0.0408, + "step": 7459, + "task_loss": 0.1103094071149826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998496765046526, + "compression_loss": 0.0, + "distillation_loss": 0.11287416517734528, + "epoch": 7.08, + "learning_rate": 4.9983979756262136e-05, + "loss": 0.1219, + "step": 7460, + "task_loss": 0.20322994887828827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998501433474717, + "compression_loss": 0.0, + "distillation_loss": 0.026843059808015823, + "epoch": 7.09, + "learning_rate": 4.998359609341188e-05, + "loss": 0.037, + "step": 7461, + "task_loss": 0.12811830639839172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998506092227414, + "compression_loss": 0.0, + "distillation_loss": 0.06953533738851547, + "epoch": 7.09, + "learning_rate": 4.9983207892171074e-05, + "loss": 0.0673, + "step": 7462, + "task_loss": 0.04702718183398247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998510741314653, + "compression_loss": 0.0, + "distillation_loss": 0.05555496737360954, + "epoch": 7.09, + "learning_rate": 4.998281515261023e-05, + "loss": 0.0607, + "step": 7463, + "task_loss": 0.10690201073884964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998515380746469, + "compression_loss": 0.0, + "distillation_loss": 0.025969993323087692, + "epoch": 7.09, + "learning_rate": 4.9982417874800704e-05, + "loss": 0.0266, + "step": 7464, + "task_loss": 0.03247838839888573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79985200105329, + "compression_loss": 0.0, + "distillation_loss": 0.057906825095415115, + "epoch": 7.09, + "learning_rate": 4.998201605881465e-05, + "loss": 0.0635, + "step": 7465, + "task_loss": 0.1140889823436737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998524630683984, + "compression_loss": 0.0, + "distillation_loss": 0.019549962133169174, + "epoch": 7.09, + "learning_rate": 4.9981609704725057e-05, + "loss": 0.0261, + "step": 7466, + "task_loss": 0.08523699641227722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998529241209756, + "compression_loss": 0.0, + "distillation_loss": 0.14493508636951447, + "epoch": 7.09, + "learning_rate": 4.998119881260576e-05, + "loss": 0.1549, + "step": 7467, + "task_loss": 0.244978129863739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998533842120253, + "compression_loss": 0.0, + "distillation_loss": 0.10177647322416306, + "epoch": 7.09, + "learning_rate": 4.9980783382531376e-05, + "loss": 0.1153, + "step": 7468, + "task_loss": 0.23712855577468872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998538433425514, + "compression_loss": 0.0, + "distillation_loss": 0.0323081910610199, + "epoch": 7.09, + "learning_rate": 4.998036341457739e-05, + "loss": 0.0372, + "step": 7469, + "task_loss": 0.08089584857225418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998543015135573, + "compression_loss": 0.0, + "distillation_loss": 0.045823805034160614, + "epoch": 7.09, + "learning_rate": 4.997993890882008e-05, + "loss": 0.0499, + "step": 7470, + "task_loss": 0.08696474879980087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998547587260468, + "compression_loss": 0.0, + "distillation_loss": 0.03767596185207367, + "epoch": 7.09, + "learning_rate": 4.997950986533656e-05, + "loss": 0.0433, + "step": 7471, + "task_loss": 0.0938701331615448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998552149810236, + "compression_loss": 0.0, + "distillation_loss": 0.040337882936000824, + "epoch": 7.1, + "learning_rate": 4.997907628420477e-05, + "loss": 0.0532, + "step": 7472, + "task_loss": 0.16945821046829224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998556702794914, + "compression_loss": 0.0, + "distillation_loss": 0.0397014319896698, + "epoch": 7.1, + "learning_rate": 4.9978638165503475e-05, + "loss": 0.0405, + "step": 7473, + "task_loss": 0.04727710038423538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998561246224538, + "compression_loss": 0.0, + "distillation_loss": 0.04514710605144501, + "epoch": 7.1, + "learning_rate": 4.9978195509312266e-05, + "loss": 0.061, + "step": 7474, + "task_loss": 0.20394141972064972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998565780109146, + "compression_loss": 0.0, + "distillation_loss": 0.13967446982860565, + "epoch": 7.1, + "learning_rate": 4.997774831571154e-05, + "loss": 0.143, + "step": 7475, + "task_loss": 0.17266049981117249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998570304458774, + "compression_loss": 0.0, + "distillation_loss": 0.043175503611564636, + "epoch": 7.1, + "learning_rate": 4.9977296584782544e-05, + "loss": 0.0402, + "step": 7476, + "task_loss": 0.013554053381085396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998574819283458, + "compression_loss": 0.0, + "distillation_loss": 0.024114977568387985, + "epoch": 7.1, + "learning_rate": 4.997684031660732e-05, + "loss": 0.0293, + "step": 7477, + "task_loss": 0.07583250850439072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998579324593237, + "compression_loss": 0.0, + "distillation_loss": 0.04103930667042732, + "epoch": 7.1, + "learning_rate": 4.997637951126877e-05, + "loss": 0.0476, + "step": 7478, + "task_loss": 0.1067778468132019 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998583820398146, + "compression_loss": 0.0, + "distillation_loss": 0.10217143595218658, + "epoch": 7.1, + "learning_rate": 4.997591416885059e-05, + "loss": 0.1005, + "step": 7479, + "task_loss": 0.08553251624107361 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998588306708222, + "compression_loss": 0.0, + "distillation_loss": 0.021832741796970367, + "epoch": 7.1, + "learning_rate": 4.997544428943732e-05, + "loss": 0.0202, + "step": 7480, + "task_loss": 0.005031948909163475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998592783533502, + "compression_loss": 0.0, + "distillation_loss": 0.024123501032590866, + "epoch": 7.1, + "learning_rate": 4.997496987311431e-05, + "loss": 0.0222, + "step": 7481, + "task_loss": 0.0046864766627550125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998597250884024, + "compression_loss": 0.0, + "distillation_loss": 0.01681506633758545, + "epoch": 7.11, + "learning_rate": 4.997449091996774e-05, + "loss": 0.023, + "step": 7482, + "task_loss": 0.0789770558476448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998601708769824, + "compression_loss": 0.0, + "distillation_loss": 0.031241536140441895, + "epoch": 7.11, + "learning_rate": 4.9974007430084617e-05, + "loss": 0.0407, + "step": 7483, + "task_loss": 0.12625262141227722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998606157200938, + "compression_loss": 0.0, + "distillation_loss": 0.05506356433033943, + "epoch": 7.11, + "learning_rate": 4.997351940355277e-05, + "loss": 0.0503, + "step": 7484, + "task_loss": 0.0069837626069784164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998610596187403, + "compression_loss": 0.0, + "distillation_loss": 0.02426174283027649, + "epoch": 7.11, + "learning_rate": 4.997302684046085e-05, + "loss": 0.0417, + "step": 7485, + "task_loss": 0.19815370440483093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998615025739257, + "compression_loss": 0.0, + "distillation_loss": 0.014478189870715141, + "epoch": 7.11, + "learning_rate": 4.997252974089833e-05, + "loss": 0.0135, + "step": 7486, + "task_loss": 0.004255037754774094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998619445866537, + "compression_loss": 0.0, + "distillation_loss": 0.03041962906718254, + "epoch": 7.11, + "learning_rate": 4.997202810495551e-05, + "loss": 0.0277, + "step": 7487, + "task_loss": 0.0034227408468723297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998623856579278, + "compression_loss": 0.0, + "distillation_loss": 0.04478137940168381, + "epoch": 7.11, + "learning_rate": 4.997152193272353e-05, + "loss": 0.0427, + "step": 7488, + "task_loss": 0.024202093482017517 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998628257887518, + "compression_loss": 0.0, + "distillation_loss": 0.016431665048003197, + "epoch": 7.11, + "learning_rate": 4.9971011224294314e-05, + "loss": 0.0153, + "step": 7489, + "task_loss": 0.004669001325964928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998632649801293, + "compression_loss": 0.0, + "distillation_loss": 0.042685117572546005, + "epoch": 7.11, + "learning_rate": 4.997049597976066e-05, + "loss": 0.0468, + "step": 7490, + "task_loss": 0.0839599221944809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998637032330641, + "compression_loss": 0.0, + "distillation_loss": 0.038373369723558426, + "epoch": 7.11, + "learning_rate": 4.9969976199216144e-05, + "loss": 0.0551, + "step": 7491, + "task_loss": 0.2055097371339798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998641405485598, + "compression_loss": 0.0, + "distillation_loss": 0.01765899360179901, + "epoch": 7.11, + "learning_rate": 4.9969451882755196e-05, + "loss": 0.0163, + "step": 7492, + "task_loss": 0.0037982575595378876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998645769276201, + "compression_loss": 0.0, + "distillation_loss": 0.019907163456082344, + "epoch": 7.12, + "learning_rate": 4.996892303047306e-05, + "loss": 0.0281, + "step": 7493, + "task_loss": 0.10134420543909073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998650123712487, + "compression_loss": 0.0, + "distillation_loss": 0.02686948888003826, + "epoch": 7.12, + "learning_rate": 4.996838964246581e-05, + "loss": 0.0298, + "step": 7494, + "task_loss": 0.055808331817388535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998654468804494, + "compression_loss": 0.0, + "distillation_loss": 0.07542771100997925, + "epoch": 7.12, + "learning_rate": 4.996785171883032e-05, + "loss": 0.0714, + "step": 7495, + "task_loss": 0.034990422427654266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998658804562255, + "compression_loss": 0.0, + "distillation_loss": 0.026508131995797157, + "epoch": 7.12, + "learning_rate": 4.996730925966433e-05, + "loss": 0.0363, + "step": 7496, + "task_loss": 0.12431460618972778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998663130995811, + "compression_loss": 0.0, + "distillation_loss": 0.08525696396827698, + "epoch": 7.12, + "learning_rate": 4.996676226506636e-05, + "loss": 0.0831, + "step": 7497, + "task_loss": 0.06366419792175293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998667448115198, + "compression_loss": 0.0, + "distillation_loss": 0.02573692798614502, + "epoch": 7.12, + "learning_rate": 4.9966210735135785e-05, + "loss": 0.0237, + "step": 7498, + "task_loss": 0.005860496312379837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799867175593045, + "compression_loss": 0.0, + "distillation_loss": 0.10803981125354767, + "epoch": 7.12, + "learning_rate": 4.9965654669972794e-05, + "loss": 0.1114, + "step": 7499, + "task_loss": 0.14213261008262634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998676054451607, + "compression_loss": 0.0, + "distillation_loss": 0.018904566764831543, + "epoch": 7.12, + "learning_rate": 4.99650940696784e-05, + "loss": 0.0179, + "step": 7500, + "task_loss": 0.009208250790834427 + }, + { + "epoch": 7.12, + "eval_accuracy": 0.9036697247706422, + "eval_loss": 0.4249095320701599, + "eval_runtime": 18.0437, + "eval_samples_per_second": 48.327, + "eval_steps_per_second": 6.041, + "step": 7500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998680343688704, + "compression_loss": 0.0, + "distillation_loss": 0.1428883969783783, + "epoch": 7.12, + "learning_rate": 4.996452893435442e-05, + "loss": 0.1441, + "step": 7501, + "task_loss": 0.1551024615764618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799868462365178, + "compression_loss": 0.0, + "distillation_loss": 0.04754265025258064, + "epoch": 7.12, + "learning_rate": 4.9963959264103544e-05, + "loss": 0.0454, + "step": 7502, + "task_loss": 0.026247184723615646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998688894350868, + "compression_loss": 0.0, + "distillation_loss": 0.04800819978117943, + "epoch": 7.13, + "learning_rate": 4.996338505902924e-05, + "loss": 0.0622, + "step": 7503, + "task_loss": 0.18986263871192932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998693155796008, + "compression_loss": 0.0, + "distillation_loss": 0.08775762468576431, + "epoch": 7.13, + "learning_rate": 4.996280631923581e-05, + "loss": 0.0996, + "step": 7504, + "task_loss": 0.20661881566047668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998697407997237, + "compression_loss": 0.0, + "distillation_loss": 0.05795777961611748, + "epoch": 7.13, + "learning_rate": 4.9962223044828396e-05, + "loss": 0.0575, + "step": 7505, + "task_loss": 0.05344875901937485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998701650964589, + "compression_loss": 0.0, + "distillation_loss": 0.07826469838619232, + "epoch": 7.13, + "learning_rate": 4.9961635235912935e-05, + "loss": 0.0739, + "step": 7506, + "task_loss": 0.03454684466123581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998705884708104, + "compression_loss": 0.0, + "distillation_loss": 0.016553938388824463, + "epoch": 7.13, + "learning_rate": 4.9961042892596225e-05, + "loss": 0.0304, + "step": 7507, + "task_loss": 0.1547098606824875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998710109237817, + "compression_loss": 0.0, + "distillation_loss": 0.024404142051935196, + "epoch": 7.13, + "learning_rate": 4.996044601498586e-05, + "loss": 0.0231, + "step": 7508, + "task_loss": 0.010925725102424622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998714324563765, + "compression_loss": 0.0, + "distillation_loss": 0.12660256028175354, + "epoch": 7.13, + "learning_rate": 4.995984460319026e-05, + "loss": 0.1258, + "step": 7509, + "task_loss": 0.11870937794446945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998718530695985, + "compression_loss": 0.0, + "distillation_loss": 0.029197368770837784, + "epoch": 7.13, + "learning_rate": 4.995923865731869e-05, + "loss": 0.0291, + "step": 7510, + "task_loss": 0.028612298890948296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998722727644514, + "compression_loss": 0.0, + "distillation_loss": 0.01047599595040083, + "epoch": 7.13, + "learning_rate": 4.9958628177481195e-05, + "loss": 0.0097, + "step": 7511, + "task_loss": 0.002894926816225052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998726915419389, + "compression_loss": 0.0, + "distillation_loss": 0.13094131648540497, + "epoch": 7.13, + "learning_rate": 4.99580131637887e-05, + "loss": 0.1189, + "step": 7512, + "task_loss": 0.011026578024029732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998731094030646, + "compression_loss": 0.0, + "distillation_loss": 0.054517075419425964, + "epoch": 7.13, + "learning_rate": 4.995739361635292e-05, + "loss": 0.0499, + "step": 7513, + "task_loss": 0.008429093286395073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998735263488322, + "compression_loss": 0.0, + "distillation_loss": 0.057775288820266724, + "epoch": 7.14, + "learning_rate": 4.9956769535286385e-05, + "loss": 0.0621, + "step": 7514, + "task_loss": 0.1013016402721405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998739423802455, + "compression_loss": 0.0, + "distillation_loss": 0.13451939821243286, + "epoch": 7.14, + "learning_rate": 4.9956140920702476e-05, + "loss": 0.1393, + "step": 7515, + "task_loss": 0.1819526106119156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799874357498308, + "compression_loss": 0.0, + "distillation_loss": 0.039106979966163635, + "epoch": 7.14, + "learning_rate": 4.995550777271538e-05, + "loss": 0.0363, + "step": 7516, + "task_loss": 0.010946827009320259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998747717040235, + "compression_loss": 0.0, + "distillation_loss": 0.03019588068127632, + "epoch": 7.14, + "learning_rate": 4.995487009144011e-05, + "loss": 0.0277, + "step": 7517, + "task_loss": 0.005600154399871826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998751849983957, + "compression_loss": 0.0, + "distillation_loss": 0.1031942293047905, + "epoch": 7.14, + "learning_rate": 4.99542278769925e-05, + "loss": 0.1217, + "step": 7518, + "task_loss": 0.28847843408584595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998755973824282, + "compression_loss": 0.0, + "distillation_loss": 0.03711920231580734, + "epoch": 7.14, + "learning_rate": 4.995358112948921e-05, + "loss": 0.0388, + "step": 7519, + "task_loss": 0.053622905164957047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998760088571247, + "compression_loss": 0.0, + "distillation_loss": 0.036818671971559525, + "epoch": 7.14, + "learning_rate": 4.9952929849047734e-05, + "loss": 0.034, + "step": 7520, + "task_loss": 0.008642930537462234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998764194234891, + "compression_loss": 0.0, + "distillation_loss": 0.040213510394096375, + "epoch": 7.14, + "learning_rate": 4.9952274035786385e-05, + "loss": 0.0417, + "step": 7521, + "task_loss": 0.054693013429641724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998768290825247, + "compression_loss": 0.0, + "distillation_loss": 0.025412341579794884, + "epoch": 7.14, + "learning_rate": 4.9951613689824276e-05, + "loss": 0.0302, + "step": 7522, + "task_loss": 0.07287220656871796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998772378352353, + "compression_loss": 0.0, + "distillation_loss": 0.052676744759082794, + "epoch": 7.14, + "learning_rate": 4.995094881128138e-05, + "loss": 0.0828, + "step": 7523, + "task_loss": 0.3543989956378937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998776456826249, + "compression_loss": 0.0, + "distillation_loss": 0.04211168363690376, + "epoch": 7.15, + "learning_rate": 4.995027940027846e-05, + "loss": 0.0418, + "step": 7524, + "task_loss": 0.038644563406705856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998780526256968, + "compression_loss": 0.0, + "distillation_loss": 0.0226961188018322, + "epoch": 7.15, + "learning_rate": 4.9949605456937135e-05, + "loss": 0.0388, + "step": 7525, + "task_loss": 0.18341752886772156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998784586654548, + "compression_loss": 0.0, + "distillation_loss": 0.05215848237276077, + "epoch": 7.15, + "learning_rate": 4.994892698137981e-05, + "loss": 0.0506, + "step": 7526, + "task_loss": 0.03646836429834366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998788638029026, + "compression_loss": 0.0, + "distillation_loss": 0.022788241505622864, + "epoch": 7.15, + "learning_rate": 4.9948243973729745e-05, + "loss": 0.029, + "step": 7527, + "task_loss": 0.08468222618103027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998792680390439, + "compression_loss": 0.0, + "distillation_loss": 0.08569545298814774, + "epoch": 7.15, + "learning_rate": 4.994755643411101e-05, + "loss": 0.0933, + "step": 7528, + "task_loss": 0.16126233339309692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998796713748824, + "compression_loss": 0.0, + "distillation_loss": 0.019816741347312927, + "epoch": 7.15, + "learning_rate": 4.9946864362648506e-05, + "loss": 0.0305, + "step": 7529, + "task_loss": 0.1267254650592804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998800738114217, + "compression_loss": 0.0, + "distillation_loss": 0.17498815059661865, + "epoch": 7.15, + "learning_rate": 4.994616775946794e-05, + "loss": 0.1664, + "step": 7530, + "task_loss": 0.08893725275993347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998804753496654, + "compression_loss": 0.0, + "distillation_loss": 0.022720731794834137, + "epoch": 7.15, + "learning_rate": 4.994546662469586e-05, + "loss": 0.0207, + "step": 7531, + "task_loss": 0.002466036006808281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998808759906175, + "compression_loss": 0.0, + "distillation_loss": 0.045352548360824585, + "epoch": 7.15, + "learning_rate": 4.9944760958459624e-05, + "loss": 0.0428, + "step": 7532, + "task_loss": 0.020112009719014168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998812757352814, + "compression_loss": 0.0, + "distillation_loss": 0.059900932013988495, + "epoch": 7.15, + "learning_rate": 4.994405076088743e-05, + "loss": 0.063, + "step": 7533, + "task_loss": 0.0908510833978653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799881674584661, + "compression_loss": 0.0, + "distillation_loss": 0.06974376738071442, + "epoch": 7.15, + "learning_rate": 4.994333603210829e-05, + "loss": 0.0684, + "step": 7534, + "task_loss": 0.05587990581989288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998820725397597, + "compression_loss": 0.0, + "distillation_loss": 0.058896493166685104, + "epoch": 7.16, + "learning_rate": 4.9942616772252016e-05, + "loss": 0.0566, + "step": 7535, + "task_loss": 0.0360528826713562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998824696015814, + "compression_loss": 0.0, + "distillation_loss": 0.047747716307640076, + "epoch": 7.16, + "learning_rate": 4.994189298144929e-05, + "loss": 0.0435, + "step": 7536, + "task_loss": 0.005159799009561539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998828657711297, + "compression_loss": 0.0, + "distillation_loss": 0.019652074202895164, + "epoch": 7.16, + "learning_rate": 4.994116465983158e-05, + "loss": 0.0345, + "step": 7537, + "task_loss": 0.16844874620437622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998832610494083, + "compression_loss": 0.0, + "distillation_loss": 0.17571541666984558, + "epoch": 7.16, + "learning_rate": 4.99404318075312e-05, + "loss": 0.1646, + "step": 7538, + "task_loss": 0.06422274559736252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998836554374209, + "compression_loss": 0.0, + "distillation_loss": 0.1369340866804123, + "epoch": 7.16, + "learning_rate": 4.993969442468125e-05, + "loss": 0.1452, + "step": 7539, + "task_loss": 0.21913862228393555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998840489361712, + "compression_loss": 0.0, + "distillation_loss": 0.05932403728365898, + "epoch": 7.16, + "learning_rate": 4.993895251141571e-05, + "loss": 0.0689, + "step": 7540, + "task_loss": 0.15557356178760529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998844415466628, + "compression_loss": 0.0, + "distillation_loss": 0.04140976071357727, + "epoch": 7.16, + "learning_rate": 4.9938206067869334e-05, + "loss": 0.0454, + "step": 7541, + "task_loss": 0.0813070684671402 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998848332698995, + "compression_loss": 0.0, + "distillation_loss": 0.036964789032936096, + "epoch": 7.16, + "learning_rate": 4.993745509417772e-05, + "loss": 0.0396, + "step": 7542, + "task_loss": 0.06300060451030731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998852241068848, + "compression_loss": 0.0, + "distillation_loss": 0.14604540169239044, + "epoch": 7.16, + "learning_rate": 4.9936699590477296e-05, + "loss": 0.1421, + "step": 7543, + "task_loss": 0.10691849142313004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998856140586226, + "compression_loss": 0.0, + "distillation_loss": 0.10992075502872467, + "epoch": 7.16, + "learning_rate": 4.9935939556905295e-05, + "loss": 0.1188, + "step": 7544, + "task_loss": 0.19906115531921387 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998860031261164, + "compression_loss": 0.0, + "distillation_loss": 0.014540485106408596, + "epoch": 7.17, + "learning_rate": 4.993517499359978e-05, + "loss": 0.0135, + "step": 7545, + "task_loss": 0.003651769831776619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79988639131037, + "compression_loss": 0.0, + "distillation_loss": 0.06785355508327484, + "epoch": 7.17, + "learning_rate": 4.993440590069963e-05, + "loss": 0.0756, + "step": 7546, + "task_loss": 0.1453818380832672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799886778612387, + "compression_loss": 0.0, + "distillation_loss": 0.08633054792881012, + "epoch": 7.17, + "learning_rate": 4.993363227834457e-05, + "loss": 0.0958, + "step": 7547, + "task_loss": 0.18063369393348694 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998871650331711, + "compression_loss": 0.0, + "distillation_loss": 0.13422691822052002, + "epoch": 7.17, + "learning_rate": 4.9932854126675124e-05, + "loss": 0.1375, + "step": 7548, + "task_loss": 0.16670459508895874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998875505737261, + "compression_loss": 0.0, + "distillation_loss": 0.0358545184135437, + "epoch": 7.17, + "learning_rate": 4.993207144583264e-05, + "loss": 0.0333, + "step": 7549, + "task_loss": 0.009983271360397339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998879352350555, + "compression_loss": 0.0, + "distillation_loss": 0.03655195236206055, + "epoch": 7.17, + "learning_rate": 4.993128423595931e-05, + "loss": 0.0333, + "step": 7550, + "task_loss": 0.0044655874371528625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799888319018163, + "compression_loss": 0.0, + "distillation_loss": 0.04761989042162895, + "epoch": 7.17, + "learning_rate": 4.9930492497198125e-05, + "loss": 0.0541, + "step": 7551, + "task_loss": 0.11243405193090439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998887019240526, + "compression_loss": 0.0, + "distillation_loss": 0.04824792221188545, + "epoch": 7.17, + "learning_rate": 4.992969622969292e-05, + "loss": 0.0455, + "step": 7552, + "task_loss": 0.020604906603693962 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998890839537275, + "compression_loss": 0.0, + "distillation_loss": 0.02487030252814293, + "epoch": 7.17, + "learning_rate": 4.992889543358832e-05, + "loss": 0.0281, + "step": 7553, + "task_loss": 0.05736660212278366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998894651081917, + "compression_loss": 0.0, + "distillation_loss": 0.040249839425086975, + "epoch": 7.17, + "learning_rate": 4.9928090109029817e-05, + "loss": 0.0528, + "step": 7554, + "task_loss": 0.16572295129299164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998898453884489, + "compression_loss": 0.0, + "distillation_loss": 0.08049503713846207, + "epoch": 7.17, + "learning_rate": 4.9927280256163686e-05, + "loss": 0.0842, + "step": 7555, + "task_loss": 0.11797265708446503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998902247955025, + "compression_loss": 0.0, + "distillation_loss": 0.0269448421895504, + "epoch": 7.18, + "learning_rate": 4.992646587513705e-05, + "loss": 0.0313, + "step": 7556, + "task_loss": 0.07026375085115433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998906033303564, + "compression_loss": 0.0, + "distillation_loss": 0.03366638720035553, + "epoch": 7.18, + "learning_rate": 4.9925646966097835e-05, + "loss": 0.0313, + "step": 7557, + "task_loss": 0.010441986843943596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998909809940143, + "compression_loss": 0.0, + "distillation_loss": 0.015848444774746895, + "epoch": 7.18, + "learning_rate": 4.99248235291948e-05, + "loss": 0.0309, + "step": 7558, + "task_loss": 0.16670146584510803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998913577874799, + "compression_loss": 0.0, + "distillation_loss": 0.05733553692698479, + "epoch": 7.18, + "learning_rate": 4.9923995564577544e-05, + "loss": 0.0547, + "step": 7559, + "task_loss": 0.03086872212588787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998917337117567, + "compression_loss": 0.0, + "distillation_loss": 0.03503969684243202, + "epoch": 7.18, + "learning_rate": 4.992316307239645e-05, + "loss": 0.0492, + "step": 7560, + "task_loss": 0.17629651725292206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998921087678484, + "compression_loss": 0.0, + "distillation_loss": 0.07000732421875, + "epoch": 7.18, + "learning_rate": 4.992232605280276e-05, + "loss": 0.078, + "step": 7561, + "task_loss": 0.14994311332702637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799892482956759, + "compression_loss": 0.0, + "distillation_loss": 0.042889855802059174, + "epoch": 7.18, + "learning_rate": 4.992148450594851e-05, + "loss": 0.039, + "step": 7562, + "task_loss": 0.0036024488508701324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998928562794918, + "compression_loss": 0.0, + "distillation_loss": 0.012830915860831738, + "epoch": 7.18, + "learning_rate": 4.9920638431986574e-05, + "loss": 0.0119, + "step": 7563, + "task_loss": 0.0039144158363342285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998932287370506, + "compression_loss": 0.0, + "distillation_loss": 0.017170187085866928, + "epoch": 7.18, + "learning_rate": 4.991978783107065e-05, + "loss": 0.0199, + "step": 7564, + "task_loss": 0.04400904104113579 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998936003304392, + "compression_loss": 0.0, + "distillation_loss": 0.019753381609916687, + "epoch": 7.18, + "learning_rate": 4.9918932703355256e-05, + "loss": 0.0186, + "step": 7565, + "task_loss": 0.008108856156468391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998939710606612, + "compression_loss": 0.0, + "distillation_loss": 0.11725122481584549, + "epoch": 7.19, + "learning_rate": 4.991807304899572e-05, + "loss": 0.1261, + "step": 7566, + "task_loss": 0.20566239953041077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998943409287202, + "compression_loss": 0.0, + "distillation_loss": 0.06889253109693527, + "epoch": 7.19, + "learning_rate": 4.991720886814821e-05, + "loss": 0.0706, + "step": 7567, + "task_loss": 0.08627396821975708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998947099356201, + "compression_loss": 0.0, + "distillation_loss": 0.13849744200706482, + "epoch": 7.19, + "learning_rate": 4.99163401609697e-05, + "loss": 0.1404, + "step": 7568, + "task_loss": 0.15727216005325317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998950780823644, + "compression_loss": 0.0, + "distillation_loss": 0.0490923747420311, + "epoch": 7.19, + "learning_rate": 4.991546692761801e-05, + "loss": 0.047, + "step": 7569, + "task_loss": 0.028280524536967278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998954453699567, + "compression_loss": 0.0, + "distillation_loss": 0.0508795864880085, + "epoch": 7.19, + "learning_rate": 4.991458916825176e-05, + "loss": 0.0486, + "step": 7570, + "task_loss": 0.027954697608947754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799895811799401, + "compression_loss": 0.0, + "distillation_loss": 0.023934636265039444, + "epoch": 7.19, + "learning_rate": 4.991370688303039e-05, + "loss": 0.0246, + "step": 7571, + "task_loss": 0.030092671513557434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998961773717007, + "compression_loss": 0.0, + "distillation_loss": 0.016041746363043785, + "epoch": 7.19, + "learning_rate": 4.9912820072114185e-05, + "loss": 0.0149, + "step": 7572, + "task_loss": 0.0041885413229465485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998965420878595, + "compression_loss": 0.0, + "distillation_loss": 0.051787007600069046, + "epoch": 7.19, + "learning_rate": 4.9911928735664224e-05, + "loss": 0.0531, + "step": 7573, + "task_loss": 0.0652824267745018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998969059488813, + "compression_loss": 0.0, + "distillation_loss": 0.11679676175117493, + "epoch": 7.19, + "learning_rate": 4.991103287384244e-05, + "loss": 0.1229, + "step": 7574, + "task_loss": 0.1777617633342743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998972689557696, + "compression_loss": 0.0, + "distillation_loss": 0.02287183701992035, + "epoch": 7.19, + "learning_rate": 4.9910132486811555e-05, + "loss": 0.0216, + "step": 7575, + "task_loss": 0.009705502539873123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799897631109528, + "compression_loss": 0.0, + "distillation_loss": 0.07535935938358307, + "epoch": 7.19, + "learning_rate": 4.990922757473514e-05, + "loss": 0.0712, + "step": 7576, + "task_loss": 0.034015409648418427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998979924111604, + "compression_loss": 0.0, + "distillation_loss": 0.03653284162282944, + "epoch": 7.2, + "learning_rate": 4.990831813777757e-05, + "loss": 0.0336, + "step": 7577, + "task_loss": 0.006952434778213501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998983528616704, + "compression_loss": 0.0, + "distillation_loss": 0.06351464986801147, + "epoch": 7.2, + "learning_rate": 4.990740417610406e-05, + "loss": 0.0599, + "step": 7578, + "task_loss": 0.02752096578478813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998987124620617, + "compression_loss": 0.0, + "distillation_loss": 0.019939813762903214, + "epoch": 7.2, + "learning_rate": 4.9906485689880613e-05, + "loss": 0.0185, + "step": 7579, + "task_loss": 0.005348971113562584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998990712133379, + "compression_loss": 0.0, + "distillation_loss": 0.014425119385123253, + "epoch": 7.2, + "learning_rate": 4.9905562679274096e-05, + "loss": 0.0141, + "step": 7580, + "task_loss": 0.011267106980085373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7998994291165028, + "compression_loss": 0.0, + "distillation_loss": 0.1040351465344429, + "epoch": 7.2, + "learning_rate": 4.9904635144452164e-05, + "loss": 0.1014, + "step": 7581, + "task_loss": 0.07765233516693115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79989978617256, + "compression_loss": 0.0, + "distillation_loss": 0.049760229885578156, + "epoch": 7.2, + "learning_rate": 4.990370308558332e-05, + "loss": 0.0527, + "step": 7582, + "task_loss": 0.07866282761096954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999001423825131, + "compression_loss": 0.0, + "distillation_loss": 0.014256338588893414, + "epoch": 7.2, + "learning_rate": 4.9902766502836874e-05, + "loss": 0.0235, + "step": 7583, + "task_loss": 0.10717885941267014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999004977473658, + "compression_loss": 0.0, + "distillation_loss": 0.084557443857193, + "epoch": 7.2, + "learning_rate": 4.9901825396382965e-05, + "loss": 0.0891, + "step": 7584, + "task_loss": 0.13022615015506744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799900852268122, + "compression_loss": 0.0, + "distillation_loss": 0.1039082333445549, + "epoch": 7.2, + "learning_rate": 4.990087976639254e-05, + "loss": 0.1037, + "step": 7585, + "task_loss": 0.10136047005653381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999012059457853, + "compression_loss": 0.0, + "distillation_loss": 0.08127206563949585, + "epoch": 7.2, + "learning_rate": 4.989992961303738e-05, + "loss": 0.0907, + "step": 7586, + "task_loss": 0.17568367719650269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999015587813593, + "compression_loss": 0.0, + "distillation_loss": 0.012685288675129414, + "epoch": 7.21, + "learning_rate": 4.989897493649008e-05, + "loss": 0.0195, + "step": 7587, + "task_loss": 0.08097560703754425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999019107758476, + "compression_loss": 0.0, + "distillation_loss": 0.03525643050670624, + "epoch": 7.21, + "learning_rate": 4.989801573692408e-05, + "loss": 0.0407, + "step": 7588, + "task_loss": 0.08939573168754578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799902261930254, + "compression_loss": 0.0, + "distillation_loss": 0.04309917986392975, + "epoch": 7.21, + "learning_rate": 4.989705201451361e-05, + "loss": 0.0391, + "step": 7589, + "task_loss": 0.00266990065574646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999026122455822, + "compression_loss": 0.0, + "distillation_loss": 0.018025081604719162, + "epoch": 7.21, + "learning_rate": 4.989608376943373e-05, + "loss": 0.0172, + "step": 7590, + "task_loss": 0.009299803525209427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999029617228359, + "compression_loss": 0.0, + "distillation_loss": 0.011557912454009056, + "epoch": 7.21, + "learning_rate": 4.9895111001860335e-05, + "loss": 0.0108, + "step": 7591, + "task_loss": 0.004404161125421524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999033103630186, + "compression_loss": 0.0, + "distillation_loss": 0.021339278668165207, + "epoch": 7.21, + "learning_rate": 4.989413371197013e-05, + "loss": 0.0201, + "step": 7592, + "task_loss": 0.008754091337323189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999036581671343, + "compression_loss": 0.0, + "distillation_loss": 0.08135688304901123, + "epoch": 7.21, + "learning_rate": 4.989315189994065e-05, + "loss": 0.087, + "step": 7593, + "task_loss": 0.13778801262378693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999040051361864, + "compression_loss": 0.0, + "distillation_loss": 0.020718682557344437, + "epoch": 7.21, + "learning_rate": 4.9892165565950235e-05, + "loss": 0.0198, + "step": 7594, + "task_loss": 0.011542653664946556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999043512711788, + "compression_loss": 0.0, + "distillation_loss": 0.029304485768079758, + "epoch": 7.21, + "learning_rate": 4.9891174710178054e-05, + "loss": 0.0359, + "step": 7595, + "task_loss": 0.09544922411441803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999046965731149, + "compression_loss": 0.0, + "distillation_loss": 0.012081381864845753, + "epoch": 7.21, + "learning_rate": 4.9890179332804125e-05, + "loss": 0.0112, + "step": 7596, + "task_loss": 0.003652777522802353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999050410429985, + "compression_loss": 0.0, + "distillation_loss": 0.032954856753349304, + "epoch": 7.21, + "learning_rate": 4.988917943400924e-05, + "loss": 0.032, + "step": 7597, + "task_loss": 0.023814279586076736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999053846818335, + "compression_loss": 0.0, + "distillation_loss": 0.019010456278920174, + "epoch": 7.22, + "learning_rate": 4.988817501397505e-05, + "loss": 0.0209, + "step": 7598, + "task_loss": 0.03756894916296005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999057274906234, + "compression_loss": 0.0, + "distillation_loss": 0.05136597529053688, + "epoch": 7.22, + "learning_rate": 4.9887166072884e-05, + "loss": 0.0477, + "step": 7599, + "task_loss": 0.014553211629390717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999060694703718, + "compression_loss": 0.0, + "distillation_loss": 0.02315210923552513, + "epoch": 7.22, + "learning_rate": 4.988615261091938e-05, + "loss": 0.0213, + "step": 7600, + "task_loss": 0.004539225250482559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999064106220825, + "compression_loss": 0.0, + "distillation_loss": 0.05613788589835167, + "epoch": 7.22, + "learning_rate": 4.9885134628265276e-05, + "loss": 0.0686, + "step": 7601, + "task_loss": 0.1807435154914856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999067509467591, + "compression_loss": 0.0, + "distillation_loss": 0.017415829002857208, + "epoch": 7.22, + "learning_rate": 4.988411212510663e-05, + "loss": 0.0263, + "step": 7602, + "task_loss": 0.10614414513111115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999070904454054, + "compression_loss": 0.0, + "distillation_loss": 0.02971852757036686, + "epoch": 7.22, + "learning_rate": 4.988308510162917e-05, + "loss": 0.0275, + "step": 7603, + "task_loss": 0.007290884852409363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799907429119025, + "compression_loss": 0.0, + "distillation_loss": 0.09431088715791702, + "epoch": 7.22, + "learning_rate": 4.988205355801945e-05, + "loss": 0.0992, + "step": 7604, + "task_loss": 0.14289163053035736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999077669686216, + "compression_loss": 0.0, + "distillation_loss": 0.11817564815282822, + "epoch": 7.22, + "learning_rate": 4.988101749446488e-05, + "loss": 0.1131, + "step": 7605, + "task_loss": 0.06737690418958664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799908103995199, + "compression_loss": 0.0, + "distillation_loss": 0.05987474322319031, + "epoch": 7.22, + "learning_rate": 4.987997691115366e-05, + "loss": 0.0599, + "step": 7606, + "task_loss": 0.060150373727083206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999084401997606, + "compression_loss": 0.0, + "distillation_loss": 0.1141742616891861, + "epoch": 7.22, + "learning_rate": 4.98789318082748e-05, + "loss": 0.1098, + "step": 7607, + "task_loss": 0.07077798992395401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999087755833103, + "compression_loss": 0.0, + "distillation_loss": 0.024525757879018784, + "epoch": 7.23, + "learning_rate": 4.987788218601816e-05, + "loss": 0.0302, + "step": 7608, + "task_loss": 0.08100198209285736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999091101468517, + "compression_loss": 0.0, + "distillation_loss": 0.040195684880018234, + "epoch": 7.23, + "learning_rate": 4.987682804457441e-05, + "loss": 0.0451, + "step": 7609, + "task_loss": 0.08934106677770615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999094438913886, + "compression_loss": 0.0, + "distillation_loss": 0.044671397656202316, + "epoch": 7.23, + "learning_rate": 4.987576938413504e-05, + "loss": 0.0412, + "step": 7610, + "task_loss": 0.009734295308589935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999097768179245, + "compression_loss": 0.0, + "distillation_loss": 0.017387911677360535, + "epoch": 7.23, + "learning_rate": 4.987470620489235e-05, + "loss": 0.0164, + "step": 7611, + "task_loss": 0.007594836875796318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999101089274633, + "compression_loss": 0.0, + "distillation_loss": 0.04098183289170265, + "epoch": 7.23, + "learning_rate": 4.9873638507039486e-05, + "loss": 0.0408, + "step": 7612, + "task_loss": 0.039046209305524826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999104402210084, + "compression_loss": 0.0, + "distillation_loss": 0.0328001007437706, + "epoch": 7.23, + "learning_rate": 4.987256629077039e-05, + "loss": 0.0379, + "step": 7613, + "task_loss": 0.08381561934947968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999107706995638, + "compression_loss": 0.0, + "distillation_loss": 0.06902207434177399, + "epoch": 7.23, + "learning_rate": 4.987148955627985e-05, + "loss": 0.0674, + "step": 7614, + "task_loss": 0.05302996188402176 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999111003641329, + "compression_loss": 0.0, + "distillation_loss": 0.015985846519470215, + "epoch": 7.23, + "learning_rate": 4.987040830376344e-05, + "loss": 0.0202, + "step": 7615, + "task_loss": 0.05846152827143669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999114292157196, + "compression_loss": 0.0, + "distillation_loss": 0.04634019732475281, + "epoch": 7.23, + "learning_rate": 4.9869322533417596e-05, + "loss": 0.0425, + "step": 7616, + "task_loss": 0.008089037612080574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999117572553275, + "compression_loss": 0.0, + "distillation_loss": 0.0537746362388134, + "epoch": 7.23, + "learning_rate": 4.9868232245439525e-05, + "loss": 0.0524, + "step": 7617, + "task_loss": 0.039632245898246765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999120844839601, + "compression_loss": 0.0, + "distillation_loss": 0.05178552865982056, + "epoch": 7.23, + "learning_rate": 4.986713744002731e-05, + "loss": 0.0538, + "step": 7618, + "task_loss": 0.07191041857004166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999124109026214, + "compression_loss": 0.0, + "distillation_loss": 0.025834370404481888, + "epoch": 7.24, + "learning_rate": 4.9866038117379824e-05, + "loss": 0.0236, + "step": 7619, + "task_loss": 0.003722256049513817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999127365123149, + "compression_loss": 0.0, + "distillation_loss": 0.19725723564624786, + "epoch": 7.24, + "learning_rate": 4.986493427769675e-05, + "loss": 0.1893, + "step": 7620, + "task_loss": 0.11761374771595001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999130613140443, + "compression_loss": 0.0, + "distillation_loss": 0.05075707659125328, + "epoch": 7.24, + "learning_rate": 4.986382592117861e-05, + "loss": 0.0515, + "step": 7621, + "task_loss": 0.05864773318171501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999133853088133, + "compression_loss": 0.0, + "distillation_loss": 0.021583644673228264, + "epoch": 7.24, + "learning_rate": 4.986271304802675e-05, + "loss": 0.0223, + "step": 7622, + "task_loss": 0.028752833604812622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999137084976256, + "compression_loss": 0.0, + "distillation_loss": 0.12262220680713654, + "epoch": 7.24, + "learning_rate": 4.986159565844333e-05, + "loss": 0.1209, + "step": 7623, + "task_loss": 0.10493774712085724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999140308814848, + "compression_loss": 0.0, + "distillation_loss": 0.054139602929353714, + "epoch": 7.24, + "learning_rate": 4.986047375263131e-05, + "loss": 0.0749, + "step": 7624, + "task_loss": 0.2614555060863495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999143524613948, + "compression_loss": 0.0, + "distillation_loss": 0.011302494443953037, + "epoch": 7.24, + "learning_rate": 4.9859347330794515e-05, + "loss": 0.0164, + "step": 7625, + "task_loss": 0.0621810182929039 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799914673238359, + "compression_loss": 0.0, + "distillation_loss": 0.019549107179045677, + "epoch": 7.24, + "learning_rate": 4.985821639313755e-05, + "loss": 0.0179, + "step": 7626, + "task_loss": 0.003164045512676239 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999149932133811, + "compression_loss": 0.0, + "distillation_loss": 0.07022275030612946, + "epoch": 7.24, + "learning_rate": 4.985708093986586e-05, + "loss": 0.0732, + "step": 7627, + "task_loss": 0.09962170571088791 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799915312387465, + "compression_loss": 0.0, + "distillation_loss": 0.042909909039735794, + "epoch": 7.24, + "learning_rate": 4.98559409711857e-05, + "loss": 0.0394, + "step": 7628, + "task_loss": 0.007330624386668205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999156307616143, + "compression_loss": 0.0, + "distillation_loss": 0.043095022439956665, + "epoch": 7.25, + "learning_rate": 4.985479648730416e-05, + "loss": 0.0452, + "step": 7629, + "task_loss": 0.06463564932346344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999159483368327, + "compression_loss": 0.0, + "distillation_loss": 0.05689408257603645, + "epoch": 7.25, + "learning_rate": 4.985364748842914e-05, + "loss": 0.0657, + "step": 7630, + "task_loss": 0.14448405802249908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999162651141237, + "compression_loss": 0.0, + "distillation_loss": 0.09874869883060455, + "epoch": 7.25, + "learning_rate": 4.985249397476934e-05, + "loss": 0.103, + "step": 7631, + "task_loss": 0.14085200428962708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999165810944913, + "compression_loss": 0.0, + "distillation_loss": 0.10595101863145828, + "epoch": 7.25, + "learning_rate": 4.985133594653434e-05, + "loss": 0.1023, + "step": 7632, + "task_loss": 0.06989569962024689 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999168962789388, + "compression_loss": 0.0, + "distillation_loss": 0.11988788843154907, + "epoch": 7.25, + "learning_rate": 4.9850173403934466e-05, + "loss": 0.1167, + "step": 7633, + "task_loss": 0.08802229911088943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999172106684702, + "compression_loss": 0.0, + "distillation_loss": 0.026174481958150864, + "epoch": 7.25, + "learning_rate": 4.9849006347180915e-05, + "loss": 0.0297, + "step": 7634, + "task_loss": 0.06151473894715309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799917524264089, + "compression_loss": 0.0, + "distillation_loss": 0.02765459194779396, + "epoch": 7.25, + "learning_rate": 4.9847834776485694e-05, + "loss": 0.0379, + "step": 7635, + "task_loss": 0.12964758276939392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999178370667991, + "compression_loss": 0.0, + "distillation_loss": 0.02943515032529831, + "epoch": 7.25, + "learning_rate": 4.984665869206161e-05, + "loss": 0.0346, + "step": 7636, + "task_loss": 0.08072115480899811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999181490776038, + "compression_loss": 0.0, + "distillation_loss": 0.09545106440782547, + "epoch": 7.25, + "learning_rate": 4.984547809412231e-05, + "loss": 0.0933, + "step": 7637, + "task_loss": 0.07358794659376144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999184602975071, + "compression_loss": 0.0, + "distillation_loss": 0.030496662482619286, + "epoch": 7.25, + "learning_rate": 4.984429298288227e-05, + "loss": 0.0326, + "step": 7638, + "task_loss": 0.051075611263513565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999187707275127, + "compression_loss": 0.0, + "distillation_loss": 0.07517765462398529, + "epoch": 7.25, + "learning_rate": 4.984310335855674e-05, + "loss": 0.0973, + "step": 7639, + "task_loss": 0.29597049951553345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799919080368624, + "compression_loss": 0.0, + "distillation_loss": 0.06372272223234177, + "epoch": 7.26, + "learning_rate": 4.9841909221361855e-05, + "loss": 0.0631, + "step": 7640, + "task_loss": 0.05780046060681343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799919389221845, + "compression_loss": 0.0, + "distillation_loss": 0.041298843920230865, + "epoch": 7.26, + "learning_rate": 4.9840710571514515e-05, + "loss": 0.0489, + "step": 7641, + "task_loss": 0.1176719143986702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999196972881792, + "compression_loss": 0.0, + "distillation_loss": 0.027051087468862534, + "epoch": 7.26, + "learning_rate": 4.9839507409232464e-05, + "loss": 0.0249, + "step": 7642, + "task_loss": 0.005155773833394051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999200045686303, + "compression_loss": 0.0, + "distillation_loss": 0.08592119812965393, + "epoch": 7.26, + "learning_rate": 4.983829973473426e-05, + "loss": 0.0866, + "step": 7643, + "task_loss": 0.09253047406673431 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799920311064202, + "compression_loss": 0.0, + "distillation_loss": 0.04047606140375137, + "epoch": 7.26, + "learning_rate": 4.983708754823929e-05, + "loss": 0.0374, + "step": 7644, + "task_loss": 0.009331891313195229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799920616775898, + "compression_loss": 0.0, + "distillation_loss": 0.058635760098695755, + "epoch": 7.26, + "learning_rate": 4.983587084996776e-05, + "loss": 0.0624, + "step": 7645, + "task_loss": 0.09588063508272171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799920921704722, + "compression_loss": 0.0, + "distillation_loss": 0.20887014269828796, + "epoch": 7.26, + "learning_rate": 4.9834649640140664e-05, + "loss": 0.1998, + "step": 7646, + "task_loss": 0.11826451122760773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999212258516776, + "compression_loss": 0.0, + "distillation_loss": 0.0295681431889534, + "epoch": 7.26, + "learning_rate": 4.9833423918979864e-05, + "loss": 0.0274, + "step": 7647, + "task_loss": 0.00763612799346447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999215292177686, + "compression_loss": 0.0, + "distillation_loss": 0.03998691588640213, + "epoch": 7.26, + "learning_rate": 4.983219368670801e-05, + "loss": 0.0477, + "step": 7648, + "task_loss": 0.117280974984169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999218318039986, + "compression_loss": 0.0, + "distillation_loss": 0.04406122863292694, + "epoch": 7.26, + "learning_rate": 4.983095894354858e-05, + "loss": 0.0483, + "step": 7649, + "task_loss": 0.08681491017341614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999221336113712, + "compression_loss": 0.0, + "distillation_loss": 0.02541101723909378, + "epoch": 7.26, + "learning_rate": 4.9829719689725865e-05, + "loss": 0.0278, + "step": 7650, + "task_loss": 0.04967557266354561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999224346408902, + "compression_loss": 0.0, + "distillation_loss": 0.01671786792576313, + "epoch": 7.27, + "learning_rate": 4.982847592546499e-05, + "loss": 0.0154, + "step": 7651, + "task_loss": 0.0038224849849939346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999227348935593, + "compression_loss": 0.0, + "distillation_loss": 0.03665540739893913, + "epoch": 7.27, + "learning_rate": 4.982722765099189e-05, + "loss": 0.0429, + "step": 7652, + "task_loss": 0.09867965430021286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999230343703821, + "compression_loss": 0.0, + "distillation_loss": 0.044541992247104645, + "epoch": 7.27, + "learning_rate": 4.982597486653332e-05, + "loss": 0.0519, + "step": 7653, + "task_loss": 0.11796452105045319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999233330723624, + "compression_loss": 0.0, + "distillation_loss": 0.030439257621765137, + "epoch": 7.27, + "learning_rate": 4.982471757231685e-05, + "loss": 0.0352, + "step": 7654, + "task_loss": 0.07802172005176544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999236310005038, + "compression_loss": 0.0, + "distillation_loss": 0.0167954470962286, + "epoch": 7.27, + "learning_rate": 4.982345576857087e-05, + "loss": 0.0169, + "step": 7655, + "task_loss": 0.017974717542529106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999239281558099, + "compression_loss": 0.0, + "distillation_loss": 0.07342584431171417, + "epoch": 7.27, + "learning_rate": 4.9822189455524604e-05, + "loss": 0.0754, + "step": 7656, + "task_loss": 0.09314364194869995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999242245392846, + "compression_loss": 0.0, + "distillation_loss": 0.1788443773984909, + "epoch": 7.27, + "learning_rate": 4.982091863340808e-05, + "loss": 0.1738, + "step": 7657, + "task_loss": 0.12857407331466675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999245201519313, + "compression_loss": 0.0, + "distillation_loss": 0.12496727705001831, + "epoch": 7.27, + "learning_rate": 4.9819643302452146e-05, + "loss": 0.1192, + "step": 7658, + "task_loss": 0.06772095710039139 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999248149947539, + "compression_loss": 0.0, + "distillation_loss": 0.03145987540483475, + "epoch": 7.27, + "learning_rate": 4.981836346288847e-05, + "loss": 0.029, + "step": 7659, + "task_loss": 0.00681840255856514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799925109068756, + "compression_loss": 0.0, + "distillation_loss": 0.010385725647211075, + "epoch": 7.27, + "learning_rate": 4.981707911494955e-05, + "loss": 0.0098, + "step": 7660, + "task_loss": 0.004181254655122757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999254023749414, + "compression_loss": 0.0, + "distillation_loss": 0.011662168428301811, + "epoch": 7.28, + "learning_rate": 4.981579025886868e-05, + "loss": 0.0109, + "step": 7661, + "task_loss": 0.0035572052001953125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999256949143135, + "compression_loss": 0.0, + "distillation_loss": 0.02936125546693802, + "epoch": 7.28, + "learning_rate": 4.981449689488e-05, + "loss": 0.0453, + "step": 7662, + "task_loss": 0.18874679505825043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999259866878764, + "compression_loss": 0.0, + "distillation_loss": 0.07481378316879272, + "epoch": 7.28, + "learning_rate": 4.981319902321846e-05, + "loss": 0.072, + "step": 7663, + "task_loss": 0.04644451290369034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999262776966334, + "compression_loss": 0.0, + "distillation_loss": 0.10028335452079773, + "epoch": 7.28, + "learning_rate": 4.981189664411981e-05, + "loss": 0.1078, + "step": 7664, + "task_loss": 0.17516379058361053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999265679415883, + "compression_loss": 0.0, + "distillation_loss": 0.02721228078007698, + "epoch": 7.28, + "learning_rate": 4.981058975782063e-05, + "loss": 0.034, + "step": 7665, + "task_loss": 0.09542686492204666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999268574237448, + "compression_loss": 0.0, + "distillation_loss": 0.035428885370492935, + "epoch": 7.28, + "learning_rate": 4.9809278364558336e-05, + "loss": 0.0378, + "step": 7666, + "task_loss": 0.05906771123409271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999271461441066, + "compression_loss": 0.0, + "distillation_loss": 0.04637853801250458, + "epoch": 7.28, + "learning_rate": 4.980796246457115e-05, + "loss": 0.0482, + "step": 7667, + "task_loss": 0.06469672918319702 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999274341036775, + "compression_loss": 0.0, + "distillation_loss": 0.060008976608514786, + "epoch": 7.28, + "learning_rate": 4.9806642058098105e-05, + "loss": 0.0557, + "step": 7668, + "task_loss": 0.016873404383659363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799927721303461, + "compression_loss": 0.0, + "distillation_loss": 0.02527848817408085, + "epoch": 7.28, + "learning_rate": 4.980531714537905e-05, + "loss": 0.0239, + "step": 7669, + "task_loss": 0.011759728193283081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999280077444608, + "compression_loss": 0.0, + "distillation_loss": 0.021292556077241898, + "epoch": 7.28, + "learning_rate": 4.980398772665468e-05, + "loss": 0.0197, + "step": 7670, + "task_loss": 0.005023036152124405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999282934276807, + "compression_loss": 0.0, + "distillation_loss": 0.028033584356307983, + "epoch": 7.28, + "learning_rate": 4.980265380216649e-05, + "loss": 0.0286, + "step": 7671, + "task_loss": 0.03369821235537529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999285783541241, + "compression_loss": 0.0, + "distillation_loss": 0.028027424588799477, + "epoch": 7.29, + "learning_rate": 4.9801315372156775e-05, + "loss": 0.0488, + "step": 7672, + "task_loss": 0.23589852452278137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999288625247951, + "compression_loss": 0.0, + "distillation_loss": 0.048800766468048096, + "epoch": 7.29, + "learning_rate": 4.979997243686868e-05, + "loss": 0.0462, + "step": 7673, + "task_loss": 0.022747982293367386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999291459406971, + "compression_loss": 0.0, + "distillation_loss": 0.040504857897758484, + "epoch": 7.29, + "learning_rate": 4.979862499654615e-05, + "loss": 0.0495, + "step": 7674, + "task_loss": 0.13015000522136688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999294286028338, + "compression_loss": 0.0, + "distillation_loss": 0.017775043845176697, + "epoch": 7.29, + "learning_rate": 4.9797273051433966e-05, + "loss": 0.0164, + "step": 7675, + "task_loss": 0.004127055406570435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799929710512209, + "compression_loss": 0.0, + "distillation_loss": 0.13806067407131195, + "epoch": 7.29, + "learning_rate": 4.97959166017777e-05, + "loss": 0.1467, + "step": 7676, + "task_loss": 0.2246706783771515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999299916698263, + "compression_loss": 0.0, + "distillation_loss": 0.05745643004775047, + "epoch": 7.29, + "learning_rate": 4.979455564782377e-05, + "loss": 0.0535, + "step": 7677, + "task_loss": 0.017686685547232628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999302720766893, + "compression_loss": 0.0, + "distillation_loss": 0.07123734056949615, + "epoch": 7.29, + "learning_rate": 4.9793190189819395e-05, + "loss": 0.0862, + "step": 7678, + "task_loss": 0.22124677896499634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999305517338018, + "compression_loss": 0.0, + "distillation_loss": 0.01819181814789772, + "epoch": 7.29, + "learning_rate": 4.979182022801262e-05, + "loss": 0.0174, + "step": 7679, + "task_loss": 0.01074174977838993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999308306421675, + "compression_loss": 0.0, + "distillation_loss": 0.045470044016838074, + "epoch": 7.29, + "learning_rate": 4.979044576265229e-05, + "loss": 0.0435, + "step": 7680, + "task_loss": 0.025997349992394447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999311088027901, + "compression_loss": 0.0, + "distillation_loss": 0.027925997972488403, + "epoch": 7.29, + "learning_rate": 4.9789066793988106e-05, + "loss": 0.0256, + "step": 7681, + "task_loss": 0.004869425669312477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999313862166731, + "compression_loss": 0.0, + "distillation_loss": 0.1813095510005951, + "epoch": 7.3, + "learning_rate": 4.978768332227054e-05, + "loss": 0.1863, + "step": 7682, + "task_loss": 0.23134742677211761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999316628848204, + "compression_loss": 0.0, + "distillation_loss": 0.015448532067239285, + "epoch": 7.3, + "learning_rate": 4.9786295347750936e-05, + "loss": 0.0143, + "step": 7683, + "task_loss": 0.00408821739256382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999319388082355, + "compression_loss": 0.0, + "distillation_loss": 0.0803908184170723, + "epoch": 7.3, + "learning_rate": 4.9784902870681406e-05, + "loss": 0.0909, + "step": 7684, + "task_loss": 0.18548454344272614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999322139879224, + "compression_loss": 0.0, + "distillation_loss": 0.060036517679691315, + "epoch": 7.3, + "learning_rate": 4.97835058913149e-05, + "loss": 0.0655, + "step": 7685, + "task_loss": 0.11482767760753632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999324884248843, + "compression_loss": 0.0, + "distillation_loss": 0.027849406003952026, + "epoch": 7.3, + "learning_rate": 4.9782104409905186e-05, + "loss": 0.0506, + "step": 7686, + "task_loss": 0.2550583779811859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999327621201252, + "compression_loss": 0.0, + "distillation_loss": 0.05457767844200134, + "epoch": 7.3, + "learning_rate": 4.9780698426706864e-05, + "loss": 0.0507, + "step": 7687, + "task_loss": 0.016233546659350395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999330350746487, + "compression_loss": 0.0, + "distillation_loss": 0.09315378218889236, + "epoch": 7.3, + "learning_rate": 4.977928794197532e-05, + "loss": 0.0893, + "step": 7688, + "task_loss": 0.05484301596879959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999333072894586, + "compression_loss": 0.0, + "distillation_loss": 0.030269654467701912, + "epoch": 7.3, + "learning_rate": 4.9777872955966785e-05, + "loss": 0.0351, + "step": 7689, + "task_loss": 0.0781378373503685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999335787655584, + "compression_loss": 0.0, + "distillation_loss": 0.05370612442493439, + "epoch": 7.3, + "learning_rate": 4.97764534689383e-05, + "loss": 0.0585, + "step": 7690, + "task_loss": 0.101178377866745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999338495039519, + "compression_loss": 0.0, + "distillation_loss": 0.07610142230987549, + "epoch": 7.3, + "learning_rate": 4.977502948114772e-05, + "loss": 0.0748, + "step": 7691, + "task_loss": 0.06338108330965042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999341195056426, + "compression_loss": 0.0, + "distillation_loss": 0.1321769803762436, + "epoch": 7.3, + "learning_rate": 4.977360099285371e-05, + "loss": 0.1249, + "step": 7692, + "task_loss": 0.05932076275348663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999343887716345, + "compression_loss": 0.0, + "distillation_loss": 0.02152719907462597, + "epoch": 7.31, + "learning_rate": 4.9772168004315765e-05, + "loss": 0.0272, + "step": 7693, + "task_loss": 0.07799746096134186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799934657302931, + "compression_loss": 0.0, + "distillation_loss": 0.021207425743341446, + "epoch": 7.31, + "learning_rate": 4.9770730515794204e-05, + "loss": 0.0265, + "step": 7694, + "task_loss": 0.07455786317586899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999349251005359, + "compression_loss": 0.0, + "distillation_loss": 0.0122305266559124, + "epoch": 7.31, + "learning_rate": 4.976928852755015e-05, + "loss": 0.0193, + "step": 7695, + "task_loss": 0.08255228400230408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799935192165453, + "compression_loss": 0.0, + "distillation_loss": 0.016809673979878426, + "epoch": 7.31, + "learning_rate": 4.976784203984554e-05, + "loss": 0.016, + "step": 7696, + "task_loss": 0.008326346054673195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999354584986857, + "compression_loss": 0.0, + "distillation_loss": 0.04126199334859848, + "epoch": 7.31, + "learning_rate": 4.976639105294314e-05, + "loss": 0.0384, + "step": 7697, + "task_loss": 0.012333398684859276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999357241012379, + "compression_loss": 0.0, + "distillation_loss": 0.1654108762741089, + "epoch": 7.31, + "learning_rate": 4.976493556710653e-05, + "loss": 0.1673, + "step": 7698, + "task_loss": 0.18462178111076355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999359889741132, + "compression_loss": 0.0, + "distillation_loss": 0.022537576034665108, + "epoch": 7.31, + "learning_rate": 4.976347558260011e-05, + "loss": 0.0206, + "step": 7699, + "task_loss": 0.00269523449242115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999362531183153, + "compression_loss": 0.0, + "distillation_loss": 0.04379589483141899, + "epoch": 7.31, + "learning_rate": 4.976201109968908e-05, + "loss": 0.0441, + "step": 7700, + "task_loss": 0.04727374017238617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999365165348479, + "compression_loss": 0.0, + "distillation_loss": 0.06073600426316261, + "epoch": 7.31, + "learning_rate": 4.976054211863949e-05, + "loss": 0.0607, + "step": 7701, + "task_loss": 0.05995677784085274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999367792247146, + "compression_loss": 0.0, + "distillation_loss": 0.03459569066762924, + "epoch": 7.31, + "learning_rate": 4.9759068639718166e-05, + "loss": 0.0339, + "step": 7702, + "task_loss": 0.027450790628790855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999370411889193, + "compression_loss": 0.0, + "distillation_loss": 0.0753156989812851, + "epoch": 7.32, + "learning_rate": 4.975759066319278e-05, + "loss": 0.0687, + "step": 7703, + "task_loss": 0.00872378796339035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999373024284654, + "compression_loss": 0.0, + "distillation_loss": 0.029609953984618187, + "epoch": 7.32, + "learning_rate": 4.9756108189331825e-05, + "loss": 0.0271, + "step": 7704, + "task_loss": 0.004311494529247284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999375629443568, + "compression_loss": 0.0, + "distillation_loss": 0.0182811226695776, + "epoch": 7.32, + "learning_rate": 4.975462121840458e-05, + "loss": 0.0215, + "step": 7705, + "task_loss": 0.05089661478996277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999378227375971, + "compression_loss": 0.0, + "distillation_loss": 0.027917640283703804, + "epoch": 7.32, + "learning_rate": 4.975312975068118e-05, + "loss": 0.0381, + "step": 7706, + "task_loss": 0.1298721432685852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999380818091898, + "compression_loss": 0.0, + "distillation_loss": 0.06191258504986763, + "epoch": 7.32, + "learning_rate": 4.975163378643255e-05, + "loss": 0.058, + "step": 7707, + "task_loss": 0.02266554906964302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999383401601389, + "compression_loss": 0.0, + "distillation_loss": 0.02043573185801506, + "epoch": 7.32, + "learning_rate": 4.975013332593044e-05, + "loss": 0.021, + "step": 7708, + "task_loss": 0.026547657325863838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999385977914479, + "compression_loss": 0.0, + "distillation_loss": 0.018625617027282715, + "epoch": 7.32, + "learning_rate": 4.97486283694474e-05, + "loss": 0.0172, + "step": 7709, + "task_loss": 0.004806183278560638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999388547041205, + "compression_loss": 0.0, + "distillation_loss": 0.11342775821685791, + "epoch": 7.32, + "learning_rate": 4.974711891725684e-05, + "loss": 0.1113, + "step": 7710, + "task_loss": 0.09240047633647919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999391108991605, + "compression_loss": 0.0, + "distillation_loss": 0.025932233780622482, + "epoch": 7.32, + "learning_rate": 4.9745604969632934e-05, + "loss": 0.0389, + "step": 7711, + "task_loss": 0.15590538084506989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999393663775713, + "compression_loss": 0.0, + "distillation_loss": 0.15824320912361145, + "epoch": 7.32, + "learning_rate": 4.974408652685072e-05, + "loss": 0.1603, + "step": 7712, + "task_loss": 0.1786690652370453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799939621140357, + "compression_loss": 0.0, + "distillation_loss": 0.1327803134918213, + "epoch": 7.32, + "learning_rate": 4.974256358918601e-05, + "loss": 0.1301, + "step": 7713, + "task_loss": 0.1060781478881836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999398751885209, + "compression_loss": 0.0, + "distillation_loss": 0.024752333760261536, + "epoch": 7.33, + "learning_rate": 4.9741036156915464e-05, + "loss": 0.0296, + "step": 7714, + "task_loss": 0.07322291284799576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799940128523067, + "compression_loss": 0.0, + "distillation_loss": 0.04586133733391762, + "epoch": 7.33, + "learning_rate": 4.973950423031655e-05, + "loss": 0.0429, + "step": 7715, + "task_loss": 0.016736658290028572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999403811449985, + "compression_loss": 0.0, + "distillation_loss": 0.014082120731472969, + "epoch": 7.33, + "learning_rate": 4.9737967809667546e-05, + "loss": 0.0198, + "step": 7716, + "task_loss": 0.07149508595466614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999406330553197, + "compression_loss": 0.0, + "distillation_loss": 0.029716944321990013, + "epoch": 7.33, + "learning_rate": 4.9736426895247545e-05, + "loss": 0.0355, + "step": 7717, + "task_loss": 0.08792907744646072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999408842550338, + "compression_loss": 0.0, + "distillation_loss": 0.012465763837099075, + "epoch": 7.33, + "learning_rate": 4.973488148733647e-05, + "loss": 0.0118, + "step": 7718, + "task_loss": 0.005948711186647415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999411347451447, + "compression_loss": 0.0, + "distillation_loss": 0.028147976845502853, + "epoch": 7.33, + "learning_rate": 4.973333158621505e-05, + "loss": 0.0256, + "step": 7719, + "task_loss": 0.0030502378940582275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999413845266561, + "compression_loss": 0.0, + "distillation_loss": 0.26714271306991577, + "epoch": 7.33, + "learning_rate": 4.973177719216483e-05, + "loss": 0.2669, + "step": 7720, + "task_loss": 0.26459577679634094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999416336005716, + "compression_loss": 0.0, + "distillation_loss": 0.0716482624411583, + "epoch": 7.33, + "learning_rate": 4.973021830546817e-05, + "loss": 0.0705, + "step": 7721, + "task_loss": 0.06003544479608536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999418819678948, + "compression_loss": 0.0, + "distillation_loss": 0.03609579801559448, + "epoch": 7.33, + "learning_rate": 4.972865492640826e-05, + "loss": 0.0456, + "step": 7722, + "task_loss": 0.13090340793132782 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999421296296296, + "compression_loss": 0.0, + "distillation_loss": 0.09618473052978516, + "epoch": 7.33, + "learning_rate": 4.972708705526908e-05, + "loss": 0.1099, + "step": 7723, + "task_loss": 0.23324905335903168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999423765867796, + "compression_loss": 0.0, + "distillation_loss": 0.04280245676636696, + "epoch": 7.34, + "learning_rate": 4.972551469233545e-05, + "loss": 0.0415, + "step": 7724, + "task_loss": 0.029460199177265167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999426228403484, + "compression_loss": 0.0, + "distillation_loss": 0.2212740182876587, + "epoch": 7.34, + "learning_rate": 4.9723937837892996e-05, + "loss": 0.2304, + "step": 7725, + "task_loss": 0.3125489354133606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999428683913398, + "compression_loss": 0.0, + "distillation_loss": 0.04743258282542229, + "epoch": 7.34, + "learning_rate": 4.972235649222817e-05, + "loss": 0.0538, + "step": 7726, + "task_loss": 0.11064188182353973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999431132407573, + "compression_loss": 0.0, + "distillation_loss": 0.04925276339054108, + "epoch": 7.34, + "learning_rate": 4.972077065562821e-05, + "loss": 0.0582, + "step": 7727, + "task_loss": 0.13823464512825012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999433573896048, + "compression_loss": 0.0, + "distillation_loss": 0.05824163183569908, + "epoch": 7.34, + "learning_rate": 4.971918032838122e-05, + "loss": 0.0562, + "step": 7728, + "task_loss": 0.03779482841491699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999436008388858, + "compression_loss": 0.0, + "distillation_loss": 0.08110888302326202, + "epoch": 7.34, + "learning_rate": 4.9717585510776065e-05, + "loss": 0.08, + "step": 7729, + "task_loss": 0.06993526965379715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999438435896041, + "compression_loss": 0.0, + "distillation_loss": 0.026565806940197945, + "epoch": 7.34, + "learning_rate": 4.971598620310246e-05, + "loss": 0.0247, + "step": 7730, + "task_loss": 0.007519997656345367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999440856427634, + "compression_loss": 0.0, + "distillation_loss": 0.03230992704629898, + "epoch": 7.34, + "learning_rate": 4.9714382405650926e-05, + "loss": 0.0364, + "step": 7731, + "task_loss": 0.07325948029756546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999443269993672, + "compression_loss": 0.0, + "distillation_loss": 0.020386017858982086, + "epoch": 7.34, + "learning_rate": 4.971277411871281e-05, + "loss": 0.0193, + "step": 7732, + "task_loss": 0.009038899093866348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999445676604194, + "compression_loss": 0.0, + "distillation_loss": 0.07571500539779663, + "epoch": 7.34, + "learning_rate": 4.971116134258025e-05, + "loss": 0.0778, + "step": 7733, + "task_loss": 0.09688396751880646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999448076269235, + "compression_loss": 0.0, + "distillation_loss": 0.051084429025650024, + "epoch": 7.34, + "learning_rate": 4.9709544077546235e-05, + "loss": 0.0616, + "step": 7734, + "task_loss": 0.1558133214712143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999450468998834, + "compression_loss": 0.0, + "distillation_loss": 0.05314343795180321, + "epoch": 7.35, + "learning_rate": 4.9707922323904524e-05, + "loss": 0.0731, + "step": 7735, + "task_loss": 0.25305381417274475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999452854803026, + "compression_loss": 0.0, + "distillation_loss": 0.02783917263150215, + "epoch": 7.35, + "learning_rate": 4.9706296081949724e-05, + "loss": 0.0271, + "step": 7736, + "task_loss": 0.020391501486301422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999455233691849, + "compression_loss": 0.0, + "distillation_loss": 0.13770464062690735, + "epoch": 7.35, + "learning_rate": 4.9704665351977266e-05, + "loss": 0.1352, + "step": 7737, + "task_loss": 0.11255469173192978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999457605675339, + "compression_loss": 0.0, + "distillation_loss": 0.06904464215040207, + "epoch": 7.35, + "learning_rate": 4.9703030134283356e-05, + "loss": 0.063, + "step": 7738, + "task_loss": 0.008208906278014183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999459970763533, + "compression_loss": 0.0, + "distillation_loss": 0.14025121927261353, + "epoch": 7.35, + "learning_rate": 4.970139042916506e-05, + "loss": 0.1517, + "step": 7739, + "task_loss": 0.25468742847442627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999462328966467, + "compression_loss": 0.0, + "distillation_loss": 0.014299793168902397, + "epoch": 7.35, + "learning_rate": 4.969974623692023e-05, + "loss": 0.0224, + "step": 7740, + "task_loss": 0.09552035480737686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999464680294179, + "compression_loss": 0.0, + "distillation_loss": 0.050735875964164734, + "epoch": 7.35, + "learning_rate": 4.969809755784753e-05, + "loss": 0.046, + "step": 7741, + "task_loss": 0.003554748371243477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999467024756706, + "compression_loss": 0.0, + "distillation_loss": 0.11840305477380753, + "epoch": 7.35, + "learning_rate": 4.969644439224647e-05, + "loss": 0.1174, + "step": 7742, + "task_loss": 0.10836636275053024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999469362364083, + "compression_loss": 0.0, + "distillation_loss": 0.03614204376935959, + "epoch": 7.35, + "learning_rate": 4.969478674041735e-05, + "loss": 0.0386, + "step": 7743, + "task_loss": 0.061211228370666504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799947169312635, + "compression_loss": 0.0, + "distillation_loss": 0.054497864097356796, + "epoch": 7.35, + "learning_rate": 4.969312460266128e-05, + "loss": 0.0529, + "step": 7744, + "task_loss": 0.0389748215675354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799947401705354, + "compression_loss": 0.0, + "distillation_loss": 0.0671176165342331, + "epoch": 7.36, + "learning_rate": 4.969145797928021e-05, + "loss": 0.0707, + "step": 7745, + "task_loss": 0.10274288058280945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999476334155693, + "compression_loss": 0.0, + "distillation_loss": 0.03849276900291443, + "epoch": 7.36, + "learning_rate": 4.968978687057687e-05, + "loss": 0.0368, + "step": 7746, + "task_loss": 0.02152402326464653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999478644442845, + "compression_loss": 0.0, + "distillation_loss": 0.0317964106798172, + "epoch": 7.36, + "learning_rate": 4.9688111276854846e-05, + "loss": 0.036, + "step": 7747, + "task_loss": 0.0738469660282135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999480947925032, + "compression_loss": 0.0, + "distillation_loss": 0.10060693323612213, + "epoch": 7.36, + "learning_rate": 4.9686431198418515e-05, + "loss": 0.0981, + "step": 7748, + "task_loss": 0.07562766224145889 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799948324461229, + "compression_loss": 0.0, + "distillation_loss": 0.17574167251586914, + "epoch": 7.36, + "learning_rate": 4.968474663557306e-05, + "loss": 0.1741, + "step": 7749, + "task_loss": 0.1597302258014679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999485534514659, + "compression_loss": 0.0, + "distillation_loss": 0.05716216191649437, + "epoch": 7.36, + "learning_rate": 4.9683057588624494e-05, + "loss": 0.0604, + "step": 7750, + "task_loss": 0.08970184624195099 + }, + { + "epoch": 7.36, + "eval_accuracy": 0.8761467889908257, + "eval_loss": 0.5915129780769348, + "eval_runtime": 18.0311, + "eval_samples_per_second": 48.361, + "eval_steps_per_second": 6.045, + "step": 7750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999487817642172, + "compression_loss": 0.0, + "distillation_loss": 0.0786730945110321, + "epoch": 7.36, + "learning_rate": 4.968136405787964e-05, + "loss": 0.0764, + "step": 7751, + "task_loss": 0.05567537993192673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999490094004869, + "compression_loss": 0.0, + "distillation_loss": 0.12053617835044861, + "epoch": 7.36, + "learning_rate": 4.967966604364614e-05, + "loss": 0.12, + "step": 7752, + "task_loss": 0.11506214737892151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999492363612786, + "compression_loss": 0.0, + "distillation_loss": 0.13453137874603271, + "epoch": 7.36, + "learning_rate": 4.9677963546232445e-05, + "loss": 0.1363, + "step": 7753, + "task_loss": 0.152423694729805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999494626475958, + "compression_loss": 0.0, + "distillation_loss": 0.05290111154317856, + "epoch": 7.36, + "learning_rate": 4.967625656594782e-05, + "loss": 0.0494, + "step": 7754, + "task_loss": 0.018385985866189003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999496882604423, + "compression_loss": 0.0, + "distillation_loss": 0.11135755479335785, + "epoch": 7.36, + "learning_rate": 4.967454510310235e-05, + "loss": 0.1068, + "step": 7755, + "task_loss": 0.06577824056148529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999499132008219, + "compression_loss": 0.0, + "distillation_loss": 0.03572801128029823, + "epoch": 7.37, + "learning_rate": 4.967282915800693e-05, + "loss": 0.0332, + "step": 7756, + "task_loss": 0.01009390503168106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999501374697381, + "compression_loss": 0.0, + "distillation_loss": 0.13653673231601715, + "epoch": 7.37, + "learning_rate": 4.9671108730973274e-05, + "loss": 0.1311, + "step": 7757, + "task_loss": 0.08257921040058136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999503610681947, + "compression_loss": 0.0, + "distillation_loss": 0.01905696839094162, + "epoch": 7.37, + "learning_rate": 4.9669383822313886e-05, + "loss": 0.0178, + "step": 7758, + "task_loss": 0.006181072443723679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999505839971953, + "compression_loss": 0.0, + "distillation_loss": 0.051291391253471375, + "epoch": 7.37, + "learning_rate": 4.966765443234212e-05, + "loss": 0.0508, + "step": 7759, + "task_loss": 0.04657955840229988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999508062577437, + "compression_loss": 0.0, + "distillation_loss": 0.025289995595812798, + "epoch": 7.37, + "learning_rate": 4.966592056137213e-05, + "loss": 0.0322, + "step": 7760, + "task_loss": 0.09458184987306595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999510278508435, + "compression_loss": 0.0, + "distillation_loss": 0.022838320583105087, + "epoch": 7.37, + "learning_rate": 4.966418220971888e-05, + "loss": 0.031, + "step": 7761, + "task_loss": 0.10419056564569473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999512487774983, + "compression_loss": 0.0, + "distillation_loss": 0.015206392854452133, + "epoch": 7.37, + "learning_rate": 4.9662439377698145e-05, + "loss": 0.0172, + "step": 7762, + "task_loss": 0.03513802960515022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999514690387121, + "compression_loss": 0.0, + "distillation_loss": 0.07149576395750046, + "epoch": 7.37, + "learning_rate": 4.9660692065626515e-05, + "loss": 0.0657, + "step": 7763, + "task_loss": 0.013111604377627373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999516886354882, + "compression_loss": 0.0, + "distillation_loss": 0.0190842617303133, + "epoch": 7.37, + "learning_rate": 4.965894027382141e-05, + "loss": 0.0176, + "step": 7764, + "task_loss": 0.00447353720664978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999519075688304, + "compression_loss": 0.0, + "distillation_loss": 0.027284346520900726, + "epoch": 7.37, + "learning_rate": 4.965718400260105e-05, + "loss": 0.0253, + "step": 7765, + "task_loss": 0.007609104737639427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999521258397424, + "compression_loss": 0.0, + "distillation_loss": 0.05418026074767113, + "epoch": 7.38, + "learning_rate": 4.965542325228446e-05, + "loss": 0.0654, + "step": 7766, + "task_loss": 0.16616730391979218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999523434492279, + "compression_loss": 0.0, + "distillation_loss": 0.09797915816307068, + "epoch": 7.38, + "learning_rate": 4.96536580231915e-05, + "loss": 0.106, + "step": 7767, + "task_loss": 0.17831069231033325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999525603982907, + "compression_loss": 0.0, + "distillation_loss": 0.09306447952985764, + "epoch": 7.38, + "learning_rate": 4.9651888315642815e-05, + "loss": 0.0886, + "step": 7768, + "task_loss": 0.04870466887950897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999527766879343, + "compression_loss": 0.0, + "distillation_loss": 0.04752252995967865, + "epoch": 7.38, + "learning_rate": 4.96501141299599e-05, + "loss": 0.0523, + "step": 7769, + "task_loss": 0.09561814367771149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999529923191625, + "compression_loss": 0.0, + "distillation_loss": 0.05140899494290352, + "epoch": 7.38, + "learning_rate": 4.9648335466465035e-05, + "loss": 0.0529, + "step": 7770, + "task_loss": 0.06599454581737518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999532072929788, + "compression_loss": 0.0, + "distillation_loss": 0.049043748527765274, + "epoch": 7.38, + "learning_rate": 4.964655232548133e-05, + "loss": 0.0707, + "step": 7771, + "task_loss": 0.2652056813240051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999534216103871, + "compression_loss": 0.0, + "distillation_loss": 0.08960321545600891, + "epoch": 7.38, + "learning_rate": 4.964476470733269e-05, + "loss": 0.093, + "step": 7772, + "task_loss": 0.12396573275327682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799953635272391, + "compression_loss": 0.0, + "distillation_loss": 0.028217440471053123, + "epoch": 7.38, + "learning_rate": 4.964297261234385e-05, + "loss": 0.0295, + "step": 7773, + "task_loss": 0.0411832258105278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999538482799942, + "compression_loss": 0.0, + "distillation_loss": 0.03329095244407654, + "epoch": 7.38, + "learning_rate": 4.964117604084036e-05, + "loss": 0.0398, + "step": 7774, + "task_loss": 0.09877783805131912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999540606342003, + "compression_loss": 0.0, + "distillation_loss": 0.09195288270711899, + "epoch": 7.38, + "learning_rate": 4.963937499314857e-05, + "loss": 0.1022, + "step": 7775, + "task_loss": 0.1942749172449112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799954272336013, + "compression_loss": 0.0, + "distillation_loss": 0.036124154925346375, + "epoch": 7.38, + "learning_rate": 4.963756946959564e-05, + "loss": 0.0386, + "step": 7776, + "task_loss": 0.060537442564964294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799954483386436, + "compression_loss": 0.0, + "distillation_loss": 0.04166106879711151, + "epoch": 7.39, + "learning_rate": 4.9635759470509554e-05, + "loss": 0.0392, + "step": 7777, + "task_loss": 0.017511041834950447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999546937864732, + "compression_loss": 0.0, + "distillation_loss": 0.03311231732368469, + "epoch": 7.39, + "learning_rate": 4.9633944996219125e-05, + "loss": 0.0324, + "step": 7778, + "task_loss": 0.025931518524885178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799954903537128, + "compression_loss": 0.0, + "distillation_loss": 0.11695058643817902, + "epoch": 7.39, + "learning_rate": 4.9632126047053954e-05, + "loss": 0.1216, + "step": 7779, + "task_loss": 0.1629902720451355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999551126394041, + "compression_loss": 0.0, + "distillation_loss": 0.12245595455169678, + "epoch": 7.39, + "learning_rate": 4.963030262334445e-05, + "loss": 0.1158, + "step": 7780, + "task_loss": 0.05568038299679756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999553210943052, + "compression_loss": 0.0, + "distillation_loss": 0.027472712099552155, + "epoch": 7.39, + "learning_rate": 4.962847472542185e-05, + "loss": 0.031, + "step": 7781, + "task_loss": 0.062434788793325424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999555289028352, + "compression_loss": 0.0, + "distillation_loss": 0.04194699972867966, + "epoch": 7.39, + "learning_rate": 4.96266423536182e-05, + "loss": 0.0484, + "step": 7782, + "task_loss": 0.10626760870218277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999557360659975, + "compression_loss": 0.0, + "distillation_loss": 0.05600655823945999, + "epoch": 7.39, + "learning_rate": 4.9624805508266375e-05, + "loss": 0.0603, + "step": 7783, + "task_loss": 0.09906087070703506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799955942584796, + "compression_loss": 0.0, + "distillation_loss": 0.03841715306043625, + "epoch": 7.39, + "learning_rate": 4.9622964189700026e-05, + "loss": 0.0373, + "step": 7784, + "task_loss": 0.027386927977204323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999561484602341, + "compression_loss": 0.0, + "distillation_loss": 0.033997707068920135, + "epoch": 7.39, + "learning_rate": 4.962111839825365e-05, + "loss": 0.0323, + "step": 7785, + "task_loss": 0.016779828816652298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999563536933157, + "compression_loss": 0.0, + "distillation_loss": 0.05404585599899292, + "epoch": 7.39, + "learning_rate": 4.961926813426254e-05, + "loss": 0.0535, + "step": 7786, + "task_loss": 0.04876035451889038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999565582850446, + "compression_loss": 0.0, + "distillation_loss": 0.03144819289445877, + "epoch": 7.4, + "learning_rate": 4.9617413398062814e-05, + "loss": 0.0358, + "step": 7787, + "task_loss": 0.07468868046998978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999567622364242, + "compression_loss": 0.0, + "distillation_loss": 0.09421627223491669, + "epoch": 7.4, + "learning_rate": 4.9615554189991374e-05, + "loss": 0.0962, + "step": 7788, + "task_loss": 0.11395429074764252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999569655484584, + "compression_loss": 0.0, + "distillation_loss": 0.05254974216222763, + "epoch": 7.4, + "learning_rate": 4.9613690510385965e-05, + "loss": 0.0524, + "step": 7789, + "task_loss": 0.051460277289152145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999571682221507, + "compression_loss": 0.0, + "distillation_loss": 0.1056189015507698, + "epoch": 7.4, + "learning_rate": 4.961182235958515e-05, + "loss": 0.1065, + "step": 7790, + "task_loss": 0.11400679498910904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999573702585049, + "compression_loss": 0.0, + "distillation_loss": 0.04173080623149872, + "epoch": 7.4, + "learning_rate": 4.9609949737928254e-05, + "loss": 0.0572, + "step": 7791, + "task_loss": 0.19677627086639404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999575716585247, + "compression_loss": 0.0, + "distillation_loss": 0.021544422954320908, + "epoch": 7.4, + "learning_rate": 4.9608072645755476e-05, + "loss": 0.0198, + "step": 7792, + "task_loss": 0.004072500392794609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999577724232136, + "compression_loss": 0.0, + "distillation_loss": 0.06151081249117851, + "epoch": 7.4, + "learning_rate": 4.960619108340778e-05, + "loss": 0.058, + "step": 7793, + "task_loss": 0.02649463526904583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999579725535755, + "compression_loss": 0.0, + "distillation_loss": 0.07832042872905731, + "epoch": 7.4, + "learning_rate": 4.9604305051226976e-05, + "loss": 0.0742, + "step": 7794, + "task_loss": 0.03758959844708443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799958172050614, + "compression_loss": 0.0, + "distillation_loss": 0.058107852935791016, + "epoch": 7.4, + "learning_rate": 4.960241454955566e-05, + "loss": 0.0527, + "step": 7795, + "task_loss": 0.004193047061562538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999583709153328, + "compression_loss": 0.0, + "distillation_loss": 0.025543441995978355, + "epoch": 7.4, + "learning_rate": 4.960051957873725e-05, + "loss": 0.0301, + "step": 7796, + "task_loss": 0.07116261124610901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999585691487355, + "compression_loss": 0.0, + "distillation_loss": 0.09677424281835556, + "epoch": 7.4, + "learning_rate": 4.959862013911599e-05, + "loss": 0.0951, + "step": 7797, + "task_loss": 0.07997578382492065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799958766751826, + "compression_loss": 0.0, + "distillation_loss": 0.03689539432525635, + "epoch": 7.41, + "learning_rate": 4.959671623103691e-05, + "loss": 0.0423, + "step": 7798, + "task_loss": 0.09053385257720947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999589637256077, + "compression_loss": 0.0, + "distillation_loss": 0.09475536644458771, + "epoch": 7.41, + "learning_rate": 4.959480785484587e-05, + "loss": 0.1033, + "step": 7799, + "task_loss": 0.17996615171432495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999591600710844, + "compression_loss": 0.0, + "distillation_loss": 0.10527956485748291, + "epoch": 7.41, + "learning_rate": 4.959289501088953e-05, + "loss": 0.0973, + "step": 7800, + "task_loss": 0.025774430483579636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999593557892598, + "compression_loss": 0.0, + "distillation_loss": 0.04095172882080078, + "epoch": 7.41, + "learning_rate": 4.9590977699515374e-05, + "loss": 0.0552, + "step": 7801, + "task_loss": 0.18330860137939453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999595508811376, + "compression_loss": 0.0, + "distillation_loss": 0.03950580954551697, + "epoch": 7.41, + "learning_rate": 4.958905592107168e-05, + "loss": 0.0592, + "step": 7802, + "task_loss": 0.2363201081752777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999597453477214, + "compression_loss": 0.0, + "distillation_loss": 0.1426384150981903, + "epoch": 7.41, + "learning_rate": 4.958712967590756e-05, + "loss": 0.1449, + "step": 7803, + "task_loss": 0.16487504541873932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999599391900151, + "compression_loss": 0.0, + "distillation_loss": 0.03035861626267433, + "epoch": 7.41, + "learning_rate": 4.9585198964372925e-05, + "loss": 0.0333, + "step": 7804, + "task_loss": 0.060261476784944534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999601324090221, + "compression_loss": 0.0, + "distillation_loss": 0.0202895849943161, + "epoch": 7.41, + "learning_rate": 4.958326378681849e-05, + "loss": 0.0284, + "step": 7805, + "task_loss": 0.10182420909404755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999603250057462, + "compression_loss": 0.0, + "distillation_loss": 0.04021252319216728, + "epoch": 7.41, + "learning_rate": 4.958132414359579e-05, + "loss": 0.0507, + "step": 7806, + "task_loss": 0.14528432488441467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999605169811911, + "compression_loss": 0.0, + "distillation_loss": 0.14047583937644958, + "epoch": 7.41, + "learning_rate": 4.957938003505718e-05, + "loss": 0.1381, + "step": 7807, + "task_loss": 0.1167716383934021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999607083363605, + "compression_loss": 0.0, + "distillation_loss": 0.06726957857608795, + "epoch": 7.42, + "learning_rate": 4.957743146155581e-05, + "loss": 0.0725, + "step": 7808, + "task_loss": 0.11913690716028214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799960899072258, + "compression_loss": 0.0, + "distillation_loss": 0.02605987712740898, + "epoch": 7.42, + "learning_rate": 4.9575478423445655e-05, + "loss": 0.024, + "step": 7809, + "task_loss": 0.005071789026260376 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999610891898873, + "compression_loss": 0.0, + "distillation_loss": 0.04089764878153801, + "epoch": 7.42, + "learning_rate": 4.957352092108148e-05, + "loss": 0.0429, + "step": 7810, + "task_loss": 0.06113029643893242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999612786902521, + "compression_loss": 0.0, + "distillation_loss": 0.014122812077403069, + "epoch": 7.42, + "learning_rate": 4.957155895481889e-05, + "loss": 0.0132, + "step": 7811, + "task_loss": 0.005167461931705475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999614675743562, + "compression_loss": 0.0, + "distillation_loss": 0.013877642340958118, + "epoch": 7.42, + "learning_rate": 4.956959252501426e-05, + "loss": 0.0172, + "step": 7812, + "task_loss": 0.047237932682037354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999616558432031, + "compression_loss": 0.0, + "distillation_loss": 0.04274125397205353, + "epoch": 7.42, + "learning_rate": 4.956762163202484e-05, + "loss": 0.0458, + "step": 7813, + "task_loss": 0.07380035519599915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999618434977966, + "compression_loss": 0.0, + "distillation_loss": 0.18687260150909424, + "epoch": 7.42, + "learning_rate": 4.956564627620862e-05, + "loss": 0.1937, + "step": 7814, + "task_loss": 0.25557661056518555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999620305391403, + "compression_loss": 0.0, + "distillation_loss": 0.136896014213562, + "epoch": 7.42, + "learning_rate": 4.956366645792445e-05, + "loss": 0.1269, + "step": 7815, + "task_loss": 0.03715690225362778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799962216968238, + "compression_loss": 0.0, + "distillation_loss": 0.011462707072496414, + "epoch": 7.42, + "learning_rate": 4.956168217753197e-05, + "loss": 0.0107, + "step": 7816, + "task_loss": 0.004216820001602173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999624027860932, + "compression_loss": 0.0, + "distillation_loss": 0.030813174322247505, + "epoch": 7.42, + "learning_rate": 4.955969343539162e-05, + "loss": 0.0384, + "step": 7817, + "task_loss": 0.10682034492492676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999625879937098, + "compression_loss": 0.0, + "distillation_loss": 0.0257425494492054, + "epoch": 7.42, + "learning_rate": 4.955770023186469e-05, + "loss": 0.024, + "step": 7818, + "task_loss": 0.007959723472595215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999627725920913, + "compression_loss": 0.0, + "distillation_loss": 0.10705772042274475, + "epoch": 7.43, + "learning_rate": 4.9555702567313235e-05, + "loss": 0.1132, + "step": 7819, + "task_loss": 0.1683865487575531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999629565822415, + "compression_loss": 0.0, + "distillation_loss": 0.07967795431613922, + "epoch": 7.43, + "learning_rate": 4.9553700442100146e-05, + "loss": 0.0802, + "step": 7820, + "task_loss": 0.08508510887622833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799963139965164, + "compression_loss": 0.0, + "distillation_loss": 0.14191925525665283, + "epoch": 7.43, + "learning_rate": 4.955169385658912e-05, + "loss": 0.1483, + "step": 7821, + "task_loss": 0.20544429123401642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999633227418624, + "compression_loss": 0.0, + "distillation_loss": 0.025436338037252426, + "epoch": 7.43, + "learning_rate": 4.954968281114467e-05, + "loss": 0.0262, + "step": 7822, + "task_loss": 0.03303007036447525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999635049133407, + "compression_loss": 0.0, + "distillation_loss": 0.025042004883289337, + "epoch": 7.43, + "learning_rate": 4.9547667306132096e-05, + "loss": 0.0274, + "step": 7823, + "task_loss": 0.048622217029333115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999636864806022, + "compression_loss": 0.0, + "distillation_loss": 0.04454713687300682, + "epoch": 7.43, + "learning_rate": 4.954564734191753e-05, + "loss": 0.0541, + "step": 7824, + "task_loss": 0.13989417254924774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799963867444651, + "compression_loss": 0.0, + "distillation_loss": 0.037809595465660095, + "epoch": 7.43, + "learning_rate": 4.9543622918867926e-05, + "loss": 0.0451, + "step": 7825, + "task_loss": 0.11072047799825668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999640478064903, + "compression_loss": 0.0, + "distillation_loss": 0.10665163397789001, + "epoch": 7.43, + "learning_rate": 4.9541594037351e-05, + "loss": 0.1153, + "step": 7826, + "task_loss": 0.19337314367294312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999642275671242, + "compression_loss": 0.0, + "distillation_loss": 0.07981669902801514, + "epoch": 7.43, + "learning_rate": 4.953956069773534e-05, + "loss": 0.0826, + "step": 7827, + "task_loss": 0.10715599358081818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999644067275561, + "compression_loss": 0.0, + "distillation_loss": 0.026072172448039055, + "epoch": 7.43, + "learning_rate": 4.953752290039028e-05, + "loss": 0.0309, + "step": 7828, + "task_loss": 0.07401913404464722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999645852887898, + "compression_loss": 0.0, + "distillation_loss": 0.05162549391388893, + "epoch": 7.43, + "learning_rate": 4.953548064568602e-05, + "loss": 0.0483, + "step": 7829, + "task_loss": 0.018601059913635254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799964763251829, + "compression_loss": 0.0, + "distillation_loss": 0.03277206048369408, + "epoch": 7.44, + "learning_rate": 4.953343393399354e-05, + "loss": 0.0379, + "step": 7830, + "task_loss": 0.08411306887865067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999649406176773, + "compression_loss": 0.0, + "distillation_loss": 0.023461688309907913, + "epoch": 7.44, + "learning_rate": 4.953138276568462e-05, + "loss": 0.0346, + "step": 7831, + "task_loss": 0.13488860428333282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999651173873386, + "compression_loss": 0.0, + "distillation_loss": 0.20481958985328674, + "epoch": 7.44, + "learning_rate": 4.952932714113188e-05, + "loss": 0.2078, + "step": 7832, + "task_loss": 0.2342824935913086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999652935618162, + "compression_loss": 0.0, + "distillation_loss": 0.05175776779651642, + "epoch": 7.44, + "learning_rate": 4.9527267060708734e-05, + "loss": 0.0608, + "step": 7833, + "task_loss": 0.14181137084960938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999654691421142, + "compression_loss": 0.0, + "distillation_loss": 0.1361636519432068, + "epoch": 7.44, + "learning_rate": 4.9525202524789397e-05, + "loss": 0.1285, + "step": 7834, + "task_loss": 0.059967104345560074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799965644129236, + "compression_loss": 0.0, + "distillation_loss": 0.03751971200108528, + "epoch": 7.44, + "learning_rate": 4.952313353374891e-05, + "loss": 0.0376, + "step": 7835, + "task_loss": 0.03848748654127121 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999658185241852, + "compression_loss": 0.0, + "distillation_loss": 0.03644530102610588, + "epoch": 7.44, + "learning_rate": 4.952106008796311e-05, + "loss": 0.0375, + "step": 7836, + "task_loss": 0.04672703891992569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799965992327966, + "compression_loss": 0.0, + "distillation_loss": 0.03545859456062317, + "epoch": 7.44, + "learning_rate": 4.9518982187808653e-05, + "loss": 0.036, + "step": 7837, + "task_loss": 0.040626343339681625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999661655415814, + "compression_loss": 0.0, + "distillation_loss": 0.04039856046438217, + "epoch": 7.44, + "learning_rate": 4.9516899833663e-05, + "loss": 0.0369, + "step": 7838, + "task_loss": 0.005691641941666603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999663381660356, + "compression_loss": 0.0, + "distillation_loss": 0.019872594624757767, + "epoch": 7.44, + "learning_rate": 4.9514813025904413e-05, + "loss": 0.0265, + "step": 7839, + "task_loss": 0.0859319418668747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799966510202332, + "compression_loss": 0.0, + "distillation_loss": 0.024417951703071594, + "epoch": 7.45, + "learning_rate": 4.951272176491197e-05, + "loss": 0.0301, + "step": 7840, + "task_loss": 0.08107300847768784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999666816514744, + "compression_loss": 0.0, + "distillation_loss": 0.03704302757978439, + "epoch": 7.45, + "learning_rate": 4.951062605106557e-05, + "loss": 0.0485, + "step": 7841, + "task_loss": 0.15177100896835327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999668525144665, + "compression_loss": 0.0, + "distillation_loss": 0.04666809365153313, + "epoch": 7.45, + "learning_rate": 4.950852588474591e-05, + "loss": 0.0444, + "step": 7842, + "task_loss": 0.024120669811964035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999670227923119, + "compression_loss": 0.0, + "distillation_loss": 0.02970319800078869, + "epoch": 7.45, + "learning_rate": 4.9506421266334475e-05, + "loss": 0.0321, + "step": 7843, + "task_loss": 0.053807858377695084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999671924860143, + "compression_loss": 0.0, + "distillation_loss": 0.027329690754413605, + "epoch": 7.45, + "learning_rate": 4.9504312196213596e-05, + "loss": 0.0253, + "step": 7844, + "task_loss": 0.007107492536306381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999673615965774, + "compression_loss": 0.0, + "distillation_loss": 0.04351479932665825, + "epoch": 7.45, + "learning_rate": 4.95021986747664e-05, + "loss": 0.0472, + "step": 7845, + "task_loss": 0.08041299134492874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799967530125005, + "compression_loss": 0.0, + "distillation_loss": 0.07271379977464676, + "epoch": 7.45, + "learning_rate": 4.9500080702376805e-05, + "loss": 0.0735, + "step": 7846, + "task_loss": 0.08034893125295639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999676980723006, + "compression_loss": 0.0, + "distillation_loss": 0.07482333481311798, + "epoch": 7.45, + "learning_rate": 4.949795827942956e-05, + "loss": 0.087, + "step": 7847, + "task_loss": 0.19708333909511566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799967865439468, + "compression_loss": 0.0, + "distillation_loss": 0.1008092612028122, + "epoch": 7.45, + "learning_rate": 4.9495831406310205e-05, + "loss": 0.1136, + "step": 7848, + "task_loss": 0.22852961719036102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999680322275107, + "compression_loss": 0.0, + "distillation_loss": 0.05679786577820778, + "epoch": 7.45, + "learning_rate": 4.94937000834051e-05, + "loss": 0.0553, + "step": 7849, + "task_loss": 0.042113713920116425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999681984374326, + "compression_loss": 0.0, + "distillation_loss": 0.15729619562625885, + "epoch": 7.45, + "learning_rate": 4.9491564311101426e-05, + "loss": 0.1526, + "step": 7850, + "task_loss": 0.1107601523399353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999683640702373, + "compression_loss": 0.0, + "distillation_loss": 0.0956711545586586, + "epoch": 7.46, + "learning_rate": 4.9489424089787125e-05, + "loss": 0.0906, + "step": 7851, + "task_loss": 0.04521763324737549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999685291269284, + "compression_loss": 0.0, + "distillation_loss": 0.1287081092596054, + "epoch": 7.46, + "learning_rate": 4.948727941985101e-05, + "loss": 0.1281, + "step": 7852, + "task_loss": 0.1223960816860199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999686936085098, + "compression_loss": 0.0, + "distillation_loss": 0.02583775669336319, + "epoch": 7.46, + "learning_rate": 4.948513030168265e-05, + "loss": 0.0244, + "step": 7853, + "task_loss": 0.011274663731455803 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999688575159849, + "compression_loss": 0.0, + "distillation_loss": 0.016532419249415398, + "epoch": 7.46, + "learning_rate": 4.948297673567245e-05, + "loss": 0.0152, + "step": 7854, + "task_loss": 0.0036627184599637985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999690208503576, + "compression_loss": 0.0, + "distillation_loss": 0.11579355597496033, + "epoch": 7.46, + "learning_rate": 4.948081872221161e-05, + "loss": 0.1192, + "step": 7855, + "task_loss": 0.1501166969537735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999691836126315, + "compression_loss": 0.0, + "distillation_loss": 0.038752175867557526, + "epoch": 7.46, + "learning_rate": 4.9478656261692155e-05, + "loss": 0.0355, + "step": 7856, + "task_loss": 0.0065444111824035645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999693458038103, + "compression_loss": 0.0, + "distillation_loss": 0.027601994574069977, + "epoch": 7.46, + "learning_rate": 4.947648935450689e-05, + "loss": 0.0254, + "step": 7857, + "task_loss": 0.00563957542181015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999695074248976, + "compression_loss": 0.0, + "distillation_loss": 0.10853047668933868, + "epoch": 7.46, + "learning_rate": 4.947431800104947e-05, + "loss": 0.1053, + "step": 7858, + "task_loss": 0.07578997313976288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999696684768972, + "compression_loss": 0.0, + "distillation_loss": 0.05468723922967911, + "epoch": 7.46, + "learning_rate": 4.94721422017143e-05, + "loss": 0.0603, + "step": 7859, + "task_loss": 0.11080426722764969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999698289608128, + "compression_loss": 0.0, + "distillation_loss": 0.05597386136651039, + "epoch": 7.46, + "learning_rate": 4.946996195689665e-05, + "loss": 0.0563, + "step": 7860, + "task_loss": 0.0596843883395195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999699888776479, + "compression_loss": 0.0, + "distillation_loss": 0.0576602928340435, + "epoch": 7.47, + "learning_rate": 4.9467777266992555e-05, + "loss": 0.0558, + "step": 7861, + "task_loss": 0.038872990757226944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999701482284064, + "compression_loss": 0.0, + "distillation_loss": 0.032028764486312866, + "epoch": 7.47, + "learning_rate": 4.946558813239888e-05, + "loss": 0.0351, + "step": 7862, + "task_loss": 0.06291679292917252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999703070140919, + "compression_loss": 0.0, + "distillation_loss": 0.07386447489261627, + "epoch": 7.47, + "learning_rate": 4.94633945535133e-05, + "loss": 0.071, + "step": 7863, + "task_loss": 0.04504578933119774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999704652357079, + "compression_loss": 0.0, + "distillation_loss": 0.026677310466766357, + "epoch": 7.47, + "learning_rate": 4.946119653073428e-05, + "loss": 0.0249, + "step": 7864, + "task_loss": 0.009069759398698807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999706228942585, + "compression_loss": 0.0, + "distillation_loss": 0.025820819661021233, + "epoch": 7.47, + "learning_rate": 4.9458994064461103e-05, + "loss": 0.0246, + "step": 7865, + "task_loss": 0.013258153572678566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799970779990747, + "compression_loss": 0.0, + "distillation_loss": 0.15189041197299957, + "epoch": 7.47, + "learning_rate": 4.945678715509386e-05, + "loss": 0.1558, + "step": 7866, + "task_loss": 0.1908566802740097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999709365261771, + "compression_loss": 0.0, + "distillation_loss": 0.08925406634807587, + "epoch": 7.47, + "learning_rate": 4.9454575803033445e-05, + "loss": 0.0941, + "step": 7867, + "task_loss": 0.13728086650371552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999710925015526, + "compression_loss": 0.0, + "distillation_loss": 0.12610575556755066, + "epoch": 7.47, + "learning_rate": 4.945236000868156e-05, + "loss": 0.12, + "step": 7868, + "task_loss": 0.06535394489765167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999712479178773, + "compression_loss": 0.0, + "distillation_loss": 0.02531656250357628, + "epoch": 7.47, + "learning_rate": 4.9450139772440715e-05, + "loss": 0.0236, + "step": 7869, + "task_loss": 0.007954435423016548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999714027761547, + "compression_loss": 0.0, + "distillation_loss": 0.02278076484799385, + "epoch": 7.47, + "learning_rate": 4.944791509471423e-05, + "loss": 0.0209, + "step": 7870, + "task_loss": 0.0036298464983701706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999715570773887, + "compression_loss": 0.0, + "distillation_loss": 0.010984521359205246, + "epoch": 7.47, + "learning_rate": 4.944568597590622e-05, + "loss": 0.0103, + "step": 7871, + "task_loss": 0.003945378586649895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999717108225826, + "compression_loss": 0.0, + "distillation_loss": 0.011236527003347874, + "epoch": 7.48, + "learning_rate": 4.944345241642162e-05, + "loss": 0.0105, + "step": 7872, + "task_loss": 0.0039033014327287674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999718640127403, + "compression_loss": 0.0, + "distillation_loss": 0.11728902161121368, + "epoch": 7.48, + "learning_rate": 4.944121441666617e-05, + "loss": 0.1264, + "step": 7873, + "task_loss": 0.2082228660583496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999720166488656, + "compression_loss": 0.0, + "distillation_loss": 0.05443963408470154, + "epoch": 7.48, + "learning_rate": 4.943897197704642e-05, + "loss": 0.0619, + "step": 7874, + "task_loss": 0.12860585749149323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999721687319621, + "compression_loss": 0.0, + "distillation_loss": 0.10132479667663574, + "epoch": 7.48, + "learning_rate": 4.9436725097969696e-05, + "loss": 0.1139, + "step": 7875, + "task_loss": 0.22712013125419617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999723202630333, + "compression_loss": 0.0, + "distillation_loss": 0.020510438829660416, + "epoch": 7.48, + "learning_rate": 4.943447377984418e-05, + "loss": 0.0188, + "step": 7876, + "task_loss": 0.0038789715617895126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999724712430831, + "compression_loss": 0.0, + "distillation_loss": 0.17946818470954895, + "epoch": 7.48, + "learning_rate": 4.943221802307882e-05, + "loss": 0.1833, + "step": 7877, + "task_loss": 0.21781717240810394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999726216731151, + "compression_loss": 0.0, + "distillation_loss": 0.050077952444553375, + "epoch": 7.48, + "learning_rate": 4.942995782808339e-05, + "loss": 0.0547, + "step": 7878, + "task_loss": 0.09608137607574463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799972771554133, + "compression_loss": 0.0, + "distillation_loss": 0.03088299185037613, + "epoch": 7.48, + "learning_rate": 4.9427693195268466e-05, + "loss": 0.0283, + "step": 7879, + "task_loss": 0.005498785525560379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999729208871406, + "compression_loss": 0.0, + "distillation_loss": 0.0309526976197958, + "epoch": 7.48, + "learning_rate": 4.942542412504543e-05, + "loss": 0.0317, + "step": 7880, + "task_loss": 0.038051947951316833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999730696731413, + "compression_loss": 0.0, + "distillation_loss": 0.028046930208802223, + "epoch": 7.48, + "learning_rate": 4.942315061782646e-05, + "loss": 0.0358, + "step": 7881, + "task_loss": 0.10542275756597519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799973217913139, + "compression_loss": 0.0, + "distillation_loss": 0.026341721415519714, + "epoch": 7.49, + "learning_rate": 4.942087267402457e-05, + "loss": 0.0245, + "step": 7882, + "task_loss": 0.007860302925109863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999733656081373, + "compression_loss": 0.0, + "distillation_loss": 0.014681628905236721, + "epoch": 7.49, + "learning_rate": 4.941859029405353e-05, + "loss": 0.0138, + "step": 7883, + "task_loss": 0.0060966480523347855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79997351275914, + "compression_loss": 0.0, + "distillation_loss": 0.17591474950313568, + "epoch": 7.49, + "learning_rate": 4.9416303478327974e-05, + "loss": 0.1843, + "step": 7884, + "task_loss": 0.25975045561790466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999736593671505, + "compression_loss": 0.0, + "distillation_loss": 0.04054497182369232, + "epoch": 7.49, + "learning_rate": 4.9414012227263295e-05, + "loss": 0.0438, + "step": 7885, + "task_loss": 0.07342273741960526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999738054331729, + "compression_loss": 0.0, + "distillation_loss": 0.2347552627325058, + "epoch": 7.49, + "learning_rate": 4.941171654127572e-05, + "loss": 0.2415, + "step": 7886, + "task_loss": 0.301849901676178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999739509582104, + "compression_loss": 0.0, + "distillation_loss": 0.04243939742445946, + "epoch": 7.49, + "learning_rate": 4.9409416420782264e-05, + "loss": 0.0463, + "step": 7887, + "task_loss": 0.0810224711894989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999740959432671, + "compression_loss": 0.0, + "distillation_loss": 0.021390702575445175, + "epoch": 7.49, + "learning_rate": 4.940711186620076e-05, + "loss": 0.0198, + "step": 7888, + "task_loss": 0.0057023558765649796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999742403893465, + "compression_loss": 0.0, + "distillation_loss": 0.08921066671609879, + "epoch": 7.49, + "learning_rate": 4.9404802877949843e-05, + "loss": 0.1026, + "step": 7889, + "task_loss": 0.22274163365364075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999743842974523, + "compression_loss": 0.0, + "distillation_loss": 0.03275410085916519, + "epoch": 7.49, + "learning_rate": 4.940248945644894e-05, + "loss": 0.0315, + "step": 7890, + "task_loss": 0.02053932100534439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999745276685881, + "compression_loss": 0.0, + "distillation_loss": 0.11449338495731354, + "epoch": 7.49, + "learning_rate": 4.9400171602118306e-05, + "loss": 0.1164, + "step": 7891, + "task_loss": 0.13319021463394165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999746705037577, + "compression_loss": 0.0, + "distillation_loss": 0.03880294784903526, + "epoch": 7.49, + "learning_rate": 4.939784931537899e-05, + "loss": 0.0356, + "step": 7892, + "task_loss": 0.00670219212770462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999748128039648, + "compression_loss": 0.0, + "distillation_loss": 0.09690918028354645, + "epoch": 7.5, + "learning_rate": 4.9395522596652846e-05, + "loss": 0.0992, + "step": 7893, + "task_loss": 0.12014244496822357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999749545702131, + "compression_loss": 0.0, + "distillation_loss": 0.025341130793094635, + "epoch": 7.5, + "learning_rate": 4.939319144636253e-05, + "loss": 0.0233, + "step": 7894, + "task_loss": 0.005261138081550598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799975095803506, + "compression_loss": 0.0, + "distillation_loss": 0.0120600126683712, + "epoch": 7.5, + "learning_rate": 4.9390855864931504e-05, + "loss": 0.0113, + "step": 7895, + "task_loss": 0.004162052646279335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999752365048475, + "compression_loss": 0.0, + "distillation_loss": 0.1584700495004654, + "epoch": 7.5, + "learning_rate": 4.938851585278405e-05, + "loss": 0.1588, + "step": 7896, + "task_loss": 0.16162604093551636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999753766752412, + "compression_loss": 0.0, + "distillation_loss": 0.0646199956536293, + "epoch": 7.5, + "learning_rate": 4.938617141034523e-05, + "loss": 0.0662, + "step": 7897, + "task_loss": 0.08005733042955399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999755163156906, + "compression_loss": 0.0, + "distillation_loss": 0.08716416358947754, + "epoch": 7.5, + "learning_rate": 4.938382253804094e-05, + "loss": 0.0858, + "step": 7898, + "task_loss": 0.07349246740341187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999756554271997, + "compression_loss": 0.0, + "distillation_loss": 0.022657334804534912, + "epoch": 7.5, + "learning_rate": 4.938146923629784e-05, + "loss": 0.0267, + "step": 7899, + "task_loss": 0.06275828927755356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799975794010772, + "compression_loss": 0.0, + "distillation_loss": 0.01820671185851097, + "epoch": 7.5, + "learning_rate": 4.937911150554343e-05, + "loss": 0.0228, + "step": 7900, + "task_loss": 0.06378181278705597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999759320674112, + "compression_loss": 0.0, + "distillation_loss": 0.11242584884166718, + "epoch": 7.5, + "learning_rate": 4.9376749346206006e-05, + "loss": 0.1087, + "step": 7901, + "task_loss": 0.07475525140762329 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799976069598121, + "compression_loss": 0.0, + "distillation_loss": 0.048631347715854645, + "epoch": 7.5, + "learning_rate": 4.937438275871467e-05, + "loss": 0.058, + "step": 7902, + "task_loss": 0.1423451006412506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799976206603905, + "compression_loss": 0.0, + "distillation_loss": 0.035419534891843796, + "epoch": 7.51, + "learning_rate": 4.9372011743499315e-05, + "loss": 0.0361, + "step": 7903, + "task_loss": 0.042088061571121216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999763430857669, + "compression_loss": 0.0, + "distillation_loss": 0.05880480259656906, + "epoch": 7.51, + "learning_rate": 4.9369636300990645e-05, + "loss": 0.0785, + "step": 7904, + "task_loss": 0.2559299170970917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999764790447106, + "compression_loss": 0.0, + "distillation_loss": 0.12047819793224335, + "epoch": 7.51, + "learning_rate": 4.936725643162018e-05, + "loss": 0.1142, + "step": 7905, + "task_loss": 0.05795508623123169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999766144817395, + "compression_loss": 0.0, + "distillation_loss": 0.11895313113927841, + "epoch": 7.51, + "learning_rate": 4.936487213582023e-05, + "loss": 0.1289, + "step": 7906, + "task_loss": 0.2186514288187027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999767493978573, + "compression_loss": 0.0, + "distillation_loss": 0.08085181564092636, + "epoch": 7.51, + "learning_rate": 4.9362483414023905e-05, + "loss": 0.0756, + "step": 7907, + "task_loss": 0.028206422924995422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799976883794068, + "compression_loss": 0.0, + "distillation_loss": 0.03390750288963318, + "epoch": 7.51, + "learning_rate": 4.936009026666515e-05, + "loss": 0.0341, + "step": 7908, + "task_loss": 0.035543277859687805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999770176713749, + "compression_loss": 0.0, + "distillation_loss": 0.09985256940126419, + "epoch": 7.51, + "learning_rate": 4.935769269417867e-05, + "loss": 0.1132, + "step": 7909, + "task_loss": 0.23357577621936798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999771510307819, + "compression_loss": 0.0, + "distillation_loss": 0.14149194955825806, + "epoch": 7.51, + "learning_rate": 4.935529069700001e-05, + "loss": 0.1658, + "step": 7910, + "task_loss": 0.3845874071121216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999772838732926, + "compression_loss": 0.0, + "distillation_loss": 0.12010196596384048, + "epoch": 7.51, + "learning_rate": 4.935288427556549e-05, + "loss": 0.1175, + "step": 7911, + "task_loss": 0.0936579704284668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999774161999107, + "compression_loss": 0.0, + "distillation_loss": 0.15679499506950378, + "epoch": 7.51, + "learning_rate": 4.935047343031227e-05, + "loss": 0.1524, + "step": 7912, + "task_loss": 0.1123620867729187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999775480116399, + "compression_loss": 0.0, + "distillation_loss": 0.04103892296552658, + "epoch": 7.51, + "learning_rate": 4.934805816167827e-05, + "loss": 0.0391, + "step": 7913, + "task_loss": 0.021613851189613342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999776793094838, + "compression_loss": 0.0, + "distillation_loss": 0.0210304856300354, + "epoch": 7.52, + "learning_rate": 4.934563847010224e-05, + "loss": 0.0286, + "step": 7914, + "task_loss": 0.09712889790534973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999778100944462, + "compression_loss": 0.0, + "distillation_loss": 0.049790140241384506, + "epoch": 7.52, + "learning_rate": 4.934321435602374e-05, + "loss": 0.0514, + "step": 7915, + "task_loss": 0.06559078395366669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999779403675308, + "compression_loss": 0.0, + "distillation_loss": 0.025263594463467598, + "epoch": 7.52, + "learning_rate": 4.934078581988311e-05, + "loss": 0.0281, + "step": 7916, + "task_loss": 0.0537312813103199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999780701297412, + "compression_loss": 0.0, + "distillation_loss": 0.03194242715835571, + "epoch": 7.52, + "learning_rate": 4.933835286212151e-05, + "loss": 0.0295, + "step": 7917, + "task_loss": 0.007093427702784538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799978199382081, + "compression_loss": 0.0, + "distillation_loss": 0.056847527623176575, + "epoch": 7.52, + "learning_rate": 4.9335915483180896e-05, + "loss": 0.0558, + "step": 7918, + "task_loss": 0.046506118029356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799978328125554, + "compression_loss": 0.0, + "distillation_loss": 0.03372214734554291, + "epoch": 7.52, + "learning_rate": 4.9333473683504025e-05, + "loss": 0.0443, + "step": 7919, + "task_loss": 0.1395803838968277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999784563611639, + "compression_loss": 0.0, + "distillation_loss": 0.08080513775348663, + "epoch": 7.52, + "learning_rate": 4.9331027463534484e-05, + "loss": 0.0788, + "step": 7920, + "task_loss": 0.060925908386707306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999785840899144, + "compression_loss": 0.0, + "distillation_loss": 0.018568016588687897, + "epoch": 7.52, + "learning_rate": 4.932857682371661e-05, + "loss": 0.0259, + "step": 7921, + "task_loss": 0.09170715510845184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999787113128091, + "compression_loss": 0.0, + "distillation_loss": 0.03294629603624344, + "epoch": 7.52, + "learning_rate": 4.9326121764495596e-05, + "loss": 0.0304, + "step": 7922, + "task_loss": 0.007955122739076614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999788380308517, + "compression_loss": 0.0, + "distillation_loss": 0.07836222648620605, + "epoch": 7.52, + "learning_rate": 4.932366228631741e-05, + "loss": 0.0852, + "step": 7923, + "task_loss": 0.14636337757110596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999789642450458, + "compression_loss": 0.0, + "distillation_loss": 0.08306904882192612, + "epoch": 7.53, + "learning_rate": 4.932119838962882e-05, + "loss": 0.0848, + "step": 7924, + "task_loss": 0.10019460320472717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999790899563952, + "compression_loss": 0.0, + "distillation_loss": 0.02642223611474037, + "epoch": 7.53, + "learning_rate": 4.931873007487741e-05, + "loss": 0.0414, + "step": 7925, + "task_loss": 0.17620962858200073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999792151659036, + "compression_loss": 0.0, + "distillation_loss": 0.030220147222280502, + "epoch": 7.53, + "learning_rate": 4.9316257342511565e-05, + "loss": 0.0286, + "step": 7926, + "task_loss": 0.01419852301478386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999793398745746, + "compression_loss": 0.0, + "distillation_loss": 0.018758175894618034, + "epoch": 7.53, + "learning_rate": 4.9313780192980466e-05, + "loss": 0.0236, + "step": 7927, + "task_loss": 0.06724384427070618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799979464083412, + "compression_loss": 0.0, + "distillation_loss": 0.14385569095611572, + "epoch": 7.53, + "learning_rate": 4.9311298626734095e-05, + "loss": 0.1551, + "step": 7928, + "task_loss": 0.25620976090431213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999795877934193, + "compression_loss": 0.0, + "distillation_loss": 0.13976441323757172, + "epoch": 7.53, + "learning_rate": 4.9308812644223245e-05, + "loss": 0.1381, + "step": 7929, + "task_loss": 0.12334275990724564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999797110056004, + "compression_loss": 0.0, + "distillation_loss": 0.04338621348142624, + "epoch": 7.53, + "learning_rate": 4.9306322245899505e-05, + "loss": 0.0628, + "step": 7930, + "task_loss": 0.23742617666721344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999798337209587, + "compression_loss": 0.0, + "distillation_loss": 0.010634875856339931, + "epoch": 7.53, + "learning_rate": 4.930382743221528e-05, + "loss": 0.0198, + "step": 7931, + "task_loss": 0.1026746854186058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999799559404982, + "compression_loss": 0.0, + "distillation_loss": 0.028556659817695618, + "epoch": 7.53, + "learning_rate": 4.930132820362374e-05, + "loss": 0.0416, + "step": 7932, + "task_loss": 0.15901359915733337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999800776652223, + "compression_loss": 0.0, + "distillation_loss": 0.06336972862482071, + "epoch": 7.53, + "learning_rate": 4.9298824560578895e-05, + "loss": 0.0625, + "step": 7933, + "task_loss": 0.054341040551662445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999801988961349, + "compression_loss": 0.0, + "distillation_loss": 0.09229055047035217, + "epoch": 7.53, + "learning_rate": 4.929631650353555e-05, + "loss": 0.0899, + "step": 7934, + "task_loss": 0.06858476251363754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999803196342395, + "compression_loss": 0.0, + "distillation_loss": 0.10068748891353607, + "epoch": 7.54, + "learning_rate": 4.92938040329493e-05, + "loss": 0.0991, + "step": 7935, + "task_loss": 0.0846622884273529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79998043988054, + "compression_loss": 0.0, + "distillation_loss": 0.033520594239234924, + "epoch": 7.54, + "learning_rate": 4.9291287149276544e-05, + "loss": 0.0323, + "step": 7936, + "task_loss": 0.02134551852941513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999805596360399, + "compression_loss": 0.0, + "distillation_loss": 0.04189582169055939, + "epoch": 7.54, + "learning_rate": 4.928876585297448e-05, + "loss": 0.0512, + "step": 7937, + "task_loss": 0.13514596223831177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999806789017428, + "compression_loss": 0.0, + "distillation_loss": 0.04260486364364624, + "epoch": 7.54, + "learning_rate": 4.9286240144501136e-05, + "loss": 0.0483, + "step": 7938, + "task_loss": 0.09960085153579712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999807976786527, + "compression_loss": 0.0, + "distillation_loss": 0.08986024558544159, + "epoch": 7.54, + "learning_rate": 4.928371002431531e-05, + "loss": 0.0864, + "step": 7939, + "task_loss": 0.05546959117054939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999809159677731, + "compression_loss": 0.0, + "distillation_loss": 0.05223695933818817, + "epoch": 7.54, + "learning_rate": 4.92811754928766e-05, + "loss": 0.0485, + "step": 7940, + "task_loss": 0.014858538284897804 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999810337701077, + "compression_loss": 0.0, + "distillation_loss": 0.022646090015769005, + "epoch": 7.54, + "learning_rate": 4.927863655064542e-05, + "loss": 0.0294, + "step": 7941, + "task_loss": 0.0898606926202774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79998115108666, + "compression_loss": 0.0, + "distillation_loss": 0.12153077125549316, + "epoch": 7.54, + "learning_rate": 4.9276093198082986e-05, + "loss": 0.1213, + "step": 7942, + "task_loss": 0.11907624453306198 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799981267918434, + "compression_loss": 0.0, + "distillation_loss": 0.10180039703845978, + "epoch": 7.54, + "learning_rate": 4.92735454356513e-05, + "loss": 0.1072, + "step": 7943, + "task_loss": 0.15552875399589539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999813842664332, + "compression_loss": 0.0, + "distillation_loss": 0.04333251342177391, + "epoch": 7.54, + "learning_rate": 4.927099326381319e-05, + "loss": 0.0482, + "step": 7944, + "task_loss": 0.09174293279647827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999815001316612, + "compression_loss": 0.0, + "distillation_loss": 0.06807390600442886, + "epoch": 7.55, + "learning_rate": 4.926843668303227e-05, + "loss": 0.0658, + "step": 7945, + "task_loss": 0.045711807906627655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799981615515122, + "compression_loss": 0.0, + "distillation_loss": 0.0737372487783432, + "epoch": 7.55, + "learning_rate": 4.926587569377293e-05, + "loss": 0.0728, + "step": 7946, + "task_loss": 0.06481769680976868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799981730417819, + "compression_loss": 0.0, + "distillation_loss": 0.028609251603484154, + "epoch": 7.55, + "learning_rate": 4.926331029650042e-05, + "loss": 0.0288, + "step": 7947, + "task_loss": 0.03084813803434372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999818448407559, + "compression_loss": 0.0, + "distillation_loss": 0.10599102079868317, + "epoch": 7.55, + "learning_rate": 4.926074049168074e-05, + "loss": 0.1118, + "step": 7948, + "task_loss": 0.16414104402065277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999819587849365, + "compression_loss": 0.0, + "distillation_loss": 0.04272402077913284, + "epoch": 7.55, + "learning_rate": 4.9258166279780704e-05, + "loss": 0.0482, + "step": 7949, + "task_loss": 0.0972161665558815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999820722513644, + "compression_loss": 0.0, + "distillation_loss": 0.09052185714244843, + "epoch": 7.55, + "learning_rate": 4.925558766126794e-05, + "loss": 0.0844, + "step": 7950, + "task_loss": 0.029212266206741333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999821852410433, + "compression_loss": 0.0, + "distillation_loss": 0.031549785286188126, + "epoch": 7.55, + "learning_rate": 4.9253004636610856e-05, + "loss": 0.0294, + "step": 7951, + "task_loss": 0.010255459696054459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999822977549769, + "compression_loss": 0.0, + "distillation_loss": 0.02592216432094574, + "epoch": 7.55, + "learning_rate": 4.925041720627868e-05, + "loss": 0.0242, + "step": 7952, + "task_loss": 0.008722037076950073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999824097941688, + "compression_loss": 0.0, + "distillation_loss": 0.051010869443416595, + "epoch": 7.55, + "learning_rate": 4.9247825370741416e-05, + "loss": 0.0672, + "step": 7953, + "task_loss": 0.2124231904745102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999825213596229, + "compression_loss": 0.0, + "distillation_loss": 0.04993243142962456, + "epoch": 7.55, + "learning_rate": 4.924522913046991e-05, + "loss": 0.0522, + "step": 7954, + "task_loss": 0.072673000395298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999826324523426, + "compression_loss": 0.0, + "distillation_loss": 0.024953220039606094, + "epoch": 7.55, + "learning_rate": 4.924262848593576e-05, + "loss": 0.0243, + "step": 7955, + "task_loss": 0.018649937584996223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999827430733317, + "compression_loss": 0.0, + "distillation_loss": 0.07212147861719131, + "epoch": 7.56, + "learning_rate": 4.924002343761139e-05, + "loss": 0.0838, + "step": 7956, + "task_loss": 0.18851712346076965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799982853223594, + "compression_loss": 0.0, + "distillation_loss": 0.012818554416298866, + "epoch": 7.56, + "learning_rate": 4.923741398597002e-05, + "loss": 0.0167, + "step": 7957, + "task_loss": 0.051808781921863556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999829629041331, + "compression_loss": 0.0, + "distillation_loss": 0.12450156360864639, + "epoch": 7.56, + "learning_rate": 4.9234800131485675e-05, + "loss": 0.1184, + "step": 7958, + "task_loss": 0.06334085762500763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999830721159525, + "compression_loss": 0.0, + "distillation_loss": 0.0640573650598526, + "epoch": 7.56, + "learning_rate": 4.9232181874633164e-05, + "loss": 0.0677, + "step": 7959, + "task_loss": 0.10021346807479858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999831808600562, + "compression_loss": 0.0, + "distillation_loss": 0.038610100746154785, + "epoch": 7.56, + "learning_rate": 4.922955921588812e-05, + "loss": 0.0405, + "step": 7960, + "task_loss": 0.05722128972411156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999832891374477, + "compression_loss": 0.0, + "distillation_loss": 0.07282688468694687, + "epoch": 7.56, + "learning_rate": 4.922693215572695e-05, + "loss": 0.0788, + "step": 7961, + "task_loss": 0.13281500339508057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999833969491307, + "compression_loss": 0.0, + "distillation_loss": 0.04970156401395798, + "epoch": 7.56, + "learning_rate": 4.922430069462688e-05, + "loss": 0.0521, + "step": 7962, + "task_loss": 0.07329516112804413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999835042961089, + "compression_loss": 0.0, + "distillation_loss": 0.02577708661556244, + "epoch": 7.56, + "learning_rate": 4.9221664833065914e-05, + "loss": 0.025, + "step": 7963, + "task_loss": 0.017895691096782684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999836111793859, + "compression_loss": 0.0, + "distillation_loss": 0.027007538825273514, + "epoch": 7.56, + "learning_rate": 4.921902457152289e-05, + "loss": 0.0247, + "step": 7964, + "task_loss": 0.003965174779295921 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999837175999654, + "compression_loss": 0.0, + "distillation_loss": 0.019705362617969513, + "epoch": 7.56, + "learning_rate": 4.9216379910477403e-05, + "loss": 0.0182, + "step": 7965, + "task_loss": 0.004658494144678116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999838235588513, + "compression_loss": 0.0, + "distillation_loss": 0.033040136098861694, + "epoch": 7.57, + "learning_rate": 4.921373085040988e-05, + "loss": 0.0342, + "step": 7966, + "task_loss": 0.04470521956682205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999839290570471, + "compression_loss": 0.0, + "distillation_loss": 0.023144233971834183, + "epoch": 7.57, + "learning_rate": 4.921107739180153e-05, + "loss": 0.0267, + "step": 7967, + "task_loss": 0.05902532488107681 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999840340955565, + "compression_loss": 0.0, + "distillation_loss": 0.017418548464775085, + "epoch": 7.57, + "learning_rate": 4.9208419535134376e-05, + "loss": 0.0189, + "step": 7968, + "task_loss": 0.03223051875829697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999841386753831, + "compression_loss": 0.0, + "distillation_loss": 0.2720472514629364, + "epoch": 7.57, + "learning_rate": 4.920575728089122e-05, + "loss": 0.2749, + "step": 7969, + "task_loss": 0.3010197579860687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999842427975308, + "compression_loss": 0.0, + "distillation_loss": 0.052931733429431915, + "epoch": 7.57, + "learning_rate": 4.920309062955568e-05, + "loss": 0.0564, + "step": 7970, + "task_loss": 0.0876016914844513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799984346463003, + "compression_loss": 0.0, + "distillation_loss": 0.03811418265104294, + "epoch": 7.57, + "learning_rate": 4.920041958161217e-05, + "loss": 0.0421, + "step": 7971, + "task_loss": 0.0776088535785675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999844496728036, + "compression_loss": 0.0, + "distillation_loss": 0.033870723098516464, + "epoch": 7.57, + "learning_rate": 4.9197744137545884e-05, + "loss": 0.0475, + "step": 7972, + "task_loss": 0.1701067090034485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999845524279362, + "compression_loss": 0.0, + "distillation_loss": 0.02466290071606636, + "epoch": 7.57, + "learning_rate": 4.919506429784284e-05, + "loss": 0.0317, + "step": 7973, + "task_loss": 0.09482477605342865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999846547294045, + "compression_loss": 0.0, + "distillation_loss": 0.053808264434337616, + "epoch": 7.57, + "learning_rate": 4.919238006298984e-05, + "loss": 0.0581, + "step": 7974, + "task_loss": 0.0962759405374527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999847565782121, + "compression_loss": 0.0, + "distillation_loss": 0.051727067679166794, + "epoch": 7.57, + "learning_rate": 4.9189691433474494e-05, + "loss": 0.064, + "step": 7975, + "task_loss": 0.17400768399238586 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999848579753629, + "compression_loss": 0.0, + "distillation_loss": 0.029435984790325165, + "epoch": 7.57, + "learning_rate": 4.91869984097852e-05, + "loss": 0.0384, + "step": 7976, + "task_loss": 0.11923931539058685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999849589218603, + "compression_loss": 0.0, + "distillation_loss": 0.030128782615065575, + "epoch": 7.58, + "learning_rate": 4.918430099241116e-05, + "loss": 0.036, + "step": 7977, + "task_loss": 0.08850212395191193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999850594187082, + "compression_loss": 0.0, + "distillation_loss": 0.03268412873148918, + "epoch": 7.58, + "learning_rate": 4.918159918184236e-05, + "loss": 0.0301, + "step": 7978, + "task_loss": 0.006716296076774597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999851594669102, + "compression_loss": 0.0, + "distillation_loss": 0.03931747376918793, + "epoch": 7.58, + "learning_rate": 4.9178892978569625e-05, + "loss": 0.0459, + "step": 7979, + "task_loss": 0.10497306287288666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79998525906747, + "compression_loss": 0.0, + "distillation_loss": 0.015478894114494324, + "epoch": 7.58, + "learning_rate": 4.9176182383084524e-05, + "loss": 0.0297, + "step": 7980, + "task_loss": 0.15772272646427155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999853582213913, + "compression_loss": 0.0, + "distillation_loss": 0.09228493273258209, + "epoch": 7.58, + "learning_rate": 4.917346739587946e-05, + "loss": 0.0927, + "step": 7981, + "task_loss": 0.09685853123664856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999854569296777, + "compression_loss": 0.0, + "distillation_loss": 0.01883935183286667, + "epoch": 7.58, + "learning_rate": 4.917074801744763e-05, + "loss": 0.0178, + "step": 7982, + "task_loss": 0.00816972553730011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999855551933329, + "compression_loss": 0.0, + "distillation_loss": 0.021920818835496902, + "epoch": 7.58, + "learning_rate": 4.916802424828301e-05, + "loss": 0.0203, + "step": 7983, + "task_loss": 0.0058016423135995865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999856530133607, + "compression_loss": 0.0, + "distillation_loss": 0.02121850848197937, + "epoch": 7.58, + "learning_rate": 4.9165296088880384e-05, + "loss": 0.0198, + "step": 7984, + "task_loss": 0.0068349651992321014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999857503907646, + "compression_loss": 0.0, + "distillation_loss": 0.11084365099668503, + "epoch": 7.58, + "learning_rate": 4.916256353973535e-05, + "loss": 0.1116, + "step": 7985, + "task_loss": 0.11807304620742798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999858473265484, + "compression_loss": 0.0, + "distillation_loss": 0.031100111082196236, + "epoch": 7.58, + "learning_rate": 4.9159826601344286e-05, + "loss": 0.0335, + "step": 7986, + "task_loss": 0.05468904972076416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999859438217158, + "compression_loss": 0.0, + "distillation_loss": 0.058571815490722656, + "epoch": 7.58, + "learning_rate": 4.915708527420435e-05, + "loss": 0.065, + "step": 7987, + "task_loss": 0.1225350946187973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999860398772705, + "compression_loss": 0.0, + "distillation_loss": 0.033145397901535034, + "epoch": 7.59, + "learning_rate": 4.9154339558813546e-05, + "loss": 0.0307, + "step": 7988, + "task_loss": 0.008651839569211006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999861354942159, + "compression_loss": 0.0, + "distillation_loss": 0.11876051872968674, + "epoch": 7.59, + "learning_rate": 4.915158945567062e-05, + "loss": 0.1123, + "step": 7989, + "task_loss": 0.05439896881580353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999862306735561, + "compression_loss": 0.0, + "distillation_loss": 0.1755588948726654, + "epoch": 7.59, + "learning_rate": 4.914883496527516e-05, + "loss": 0.1739, + "step": 7990, + "task_loss": 0.15885521471500397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999863254162946, + "compression_loss": 0.0, + "distillation_loss": 0.06931862980127335, + "epoch": 7.59, + "learning_rate": 4.914607608812753e-05, + "loss": 0.0661, + "step": 7991, + "task_loss": 0.03663593530654907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799986419723435, + "compression_loss": 0.0, + "distillation_loss": 0.0520208366215229, + "epoch": 7.59, + "learning_rate": 4.9143312824728896e-05, + "loss": 0.0687, + "step": 7992, + "task_loss": 0.2192659229040146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799986513595981, + "compression_loss": 0.0, + "distillation_loss": 0.045383553951978683, + "epoch": 7.59, + "learning_rate": 4.91405451755812e-05, + "loss": 0.0435, + "step": 7993, + "task_loss": 0.026806168258190155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999866070349365, + "compression_loss": 0.0, + "distillation_loss": 0.013009166345000267, + "epoch": 7.59, + "learning_rate": 4.913777314118721e-05, + "loss": 0.0176, + "step": 7994, + "task_loss": 0.05919176712632179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999867000413049, + "compression_loss": 0.0, + "distillation_loss": 0.0388064906001091, + "epoch": 7.59, + "learning_rate": 4.9134996722050483e-05, + "loss": 0.0513, + "step": 7995, + "task_loss": 0.16394969820976257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999867926160901, + "compression_loss": 0.0, + "distillation_loss": 0.018643934279680252, + "epoch": 7.59, + "learning_rate": 4.913221591867537e-05, + "loss": 0.0346, + "step": 7996, + "task_loss": 0.17829307913780212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999868847602956, + "compression_loss": 0.0, + "distillation_loss": 0.013243372552096844, + "epoch": 7.59, + "learning_rate": 4.912943073156701e-05, + "loss": 0.0122, + "step": 7997, + "task_loss": 0.0027358736842870712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999869764749251, + "compression_loss": 0.0, + "distillation_loss": 0.036573659628629684, + "epoch": 7.6, + "learning_rate": 4.912664116123134e-05, + "loss": 0.0333, + "step": 7998, + "task_loss": 0.003510754555463791 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999870677609825, + "compression_loss": 0.0, + "distillation_loss": 0.03176216036081314, + "epoch": 7.6, + "learning_rate": 4.9123847208175126e-05, + "loss": 0.0383, + "step": 7999, + "task_loss": 0.09740820527076721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999871586194712, + "compression_loss": 0.0, + "distillation_loss": 0.013369059190154076, + "epoch": 7.6, + "learning_rate": 4.912104887290587e-05, + "loss": 0.0123, + "step": 8000, + "task_loss": 0.0031455066055059433 + }, + { + "epoch": 7.6, + "eval_accuracy": 0.8784403669724771, + "eval_loss": 0.49849244952201843, + "eval_runtime": 18.0456, + "eval_samples_per_second": 48.322, + "eval_steps_per_second": 6.04, + "step": 8000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999872490513951, + "compression_loss": 0.0, + "distillation_loss": 0.10992217063903809, + "epoch": 7.6, + "learning_rate": 4.911824615593193e-05, + "loss": 0.1131, + "step": 8001, + "task_loss": 0.1418878138065338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999873390577578, + "compression_loss": 0.0, + "distillation_loss": 0.042782656848430634, + "epoch": 7.6, + "learning_rate": 4.9115439057762416e-05, + "loss": 0.0481, + "step": 8002, + "task_loss": 0.09572674334049225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999874286395628, + "compression_loss": 0.0, + "distillation_loss": 0.03839121758937836, + "epoch": 7.6, + "learning_rate": 4.911262757890726e-05, + "loss": 0.0471, + "step": 8003, + "task_loss": 0.12578138709068298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999875177978142, + "compression_loss": 0.0, + "distillation_loss": 0.01663174107670784, + "epoch": 7.6, + "learning_rate": 4.9109811719877166e-05, + "loss": 0.024, + "step": 8004, + "task_loss": 0.09052921831607819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999876065335153, + "compression_loss": 0.0, + "distillation_loss": 0.09784665703773499, + "epoch": 7.6, + "learning_rate": 4.910699148118367e-05, + "loss": 0.0997, + "step": 8005, + "task_loss": 0.1160675436258316 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79998769484767, + "compression_loss": 0.0, + "distillation_loss": 0.027502398937940598, + "epoch": 7.6, + "learning_rate": 4.910416686333906e-05, + "loss": 0.0349, + "step": 8006, + "task_loss": 0.10173024237155914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999877827412818, + "compression_loss": 0.0, + "distillation_loss": 0.03942710533738136, + "epoch": 7.6, + "learning_rate": 4.910133786685646e-05, + "loss": 0.0418, + "step": 8007, + "task_loss": 0.06354475021362305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999878702153546, + "compression_loss": 0.0, + "distillation_loss": 0.016465935856103897, + "epoch": 7.6, + "learning_rate": 4.9098504492249764e-05, + "loss": 0.0239, + "step": 8008, + "task_loss": 0.09078645706176758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999879572708918, + "compression_loss": 0.0, + "distillation_loss": 0.020133644342422485, + "epoch": 7.61, + "learning_rate": 4.9095666740033664e-05, + "loss": 0.0187, + "step": 8009, + "task_loss": 0.006201488897204399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999880439088973, + "compression_loss": 0.0, + "distillation_loss": 0.04203331097960472, + "epoch": 7.61, + "learning_rate": 4.9092824610723655e-05, + "loss": 0.0508, + "step": 8010, + "task_loss": 0.12994877994060516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999881301303748, + "compression_loss": 0.0, + "distillation_loss": 0.04908212646842003, + "epoch": 7.61, + "learning_rate": 4.908997810483602e-05, + "loss": 0.0532, + "step": 8011, + "task_loss": 0.09064097702503204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799988215936328, + "compression_loss": 0.0, + "distillation_loss": 0.0626966580748558, + "epoch": 7.61, + "learning_rate": 4.908712722288785e-05, + "loss": 0.088, + "step": 8012, + "task_loss": 0.316189706325531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999883013277603, + "compression_loss": 0.0, + "distillation_loss": 0.058064427226781845, + "epoch": 7.61, + "learning_rate": 4.9084271965397014e-05, + "loss": 0.0626, + "step": 8013, + "task_loss": 0.1036590188741684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999883863056757, + "compression_loss": 0.0, + "distillation_loss": 0.04076586291193962, + "epoch": 7.61, + "learning_rate": 4.908141233288218e-05, + "loss": 0.0374, + "step": 8014, + "task_loss": 0.00735941156744957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999884708710777, + "compression_loss": 0.0, + "distillation_loss": 0.08777140825986862, + "epoch": 7.61, + "learning_rate": 4.907854832586282e-05, + "loss": 0.0976, + "step": 8015, + "task_loss": 0.18588636815547943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999885550249701, + "compression_loss": 0.0, + "distillation_loss": 0.038717057555913925, + "epoch": 7.61, + "learning_rate": 4.907567994485919e-05, + "loss": 0.041, + "step": 8016, + "task_loss": 0.06178871542215347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999886387683565, + "compression_loss": 0.0, + "distillation_loss": 0.14275038242340088, + "epoch": 7.61, + "learning_rate": 4.9072807190392354e-05, + "loss": 0.1415, + "step": 8017, + "task_loss": 0.13036131858825684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999887221022406, + "compression_loss": 0.0, + "distillation_loss": 0.024897240102291107, + "epoch": 7.61, + "learning_rate": 4.906993006298416e-05, + "loss": 0.0451, + "step": 8018, + "task_loss": 0.22650375962257385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999888050276261, + "compression_loss": 0.0, + "distillation_loss": 0.14283829927444458, + "epoch": 7.62, + "learning_rate": 4.9067048563157235e-05, + "loss": 0.1404, + "step": 8019, + "task_loss": 0.1185140609741211 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999888875455167, + "compression_loss": 0.0, + "distillation_loss": 0.08865389972925186, + "epoch": 7.62, + "learning_rate": 4.906416269143505e-05, + "loss": 0.0981, + "step": 8020, + "task_loss": 0.18301840126514435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799988969656916, + "compression_loss": 0.0, + "distillation_loss": 0.08406002819538116, + "epoch": 7.62, + "learning_rate": 4.90612724483418e-05, + "loss": 0.0969, + "step": 8021, + "task_loss": 0.21275919675827026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999890513628277, + "compression_loss": 0.0, + "distillation_loss": 0.08485761284828186, + "epoch": 7.62, + "learning_rate": 4.905837783440253e-05, + "loss": 0.0828, + "step": 8022, + "task_loss": 0.0644686371088028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999891326642555, + "compression_loss": 0.0, + "distillation_loss": 0.040136683732271194, + "epoch": 7.62, + "learning_rate": 4.905547885014307e-05, + "loss": 0.0373, + "step": 8023, + "task_loss": 0.01128750666975975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999892135622032, + "compression_loss": 0.0, + "distillation_loss": 0.014311270788311958, + "epoch": 7.62, + "learning_rate": 4.9052575496090016e-05, + "loss": 0.0133, + "step": 8024, + "task_loss": 0.004563674330711365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999892940576744, + "compression_loss": 0.0, + "distillation_loss": 0.03336448222398758, + "epoch": 7.62, + "learning_rate": 4.904966777277079e-05, + "loss": 0.0309, + "step": 8025, + "task_loss": 0.008388657122850418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999893741516727, + "compression_loss": 0.0, + "distillation_loss": 0.030747603625059128, + "epoch": 7.62, + "learning_rate": 4.9046755680713586e-05, + "loss": 0.033, + "step": 8026, + "task_loss": 0.05296764522790909 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999894538452018, + "compression_loss": 0.0, + "distillation_loss": 0.021251916885375977, + "epoch": 7.62, + "learning_rate": 4.90438392204474e-05, + "loss": 0.0355, + "step": 8027, + "task_loss": 0.16398029029369354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999895331392655, + "compression_loss": 0.0, + "distillation_loss": 0.21242570877075195, + "epoch": 7.62, + "learning_rate": 4.9040918392502026e-05, + "loss": 0.2064, + "step": 8028, + "task_loss": 0.15173274278640747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999896120348674, + "compression_loss": 0.0, + "distillation_loss": 0.020135043188929558, + "epoch": 7.62, + "learning_rate": 4.903799319740804e-05, + "loss": 0.0209, + "step": 8029, + "task_loss": 0.02775605395436287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999896905330112, + "compression_loss": 0.0, + "distillation_loss": 0.033395953476428986, + "epoch": 7.63, + "learning_rate": 4.903506363569683e-05, + "loss": 0.0337, + "step": 8030, + "task_loss": 0.036266930401325226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999897686347005, + "compression_loss": 0.0, + "distillation_loss": 0.02742244489490986, + "epoch": 7.63, + "learning_rate": 4.9032129707900556e-05, + "loss": 0.0256, + "step": 8031, + "task_loss": 0.008756054565310478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999898463409392, + "compression_loss": 0.0, + "distillation_loss": 0.023902807384729385, + "epoch": 7.63, + "learning_rate": 4.9029191414552165e-05, + "loss": 0.0294, + "step": 8032, + "task_loss": 0.07919642329216003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999899236527307, + "compression_loss": 0.0, + "distillation_loss": 0.03757765516638756, + "epoch": 7.63, + "learning_rate": 4.9026248756185445e-05, + "loss": 0.0356, + "step": 8033, + "task_loss": 0.017600098624825478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999900005710788, + "compression_loss": 0.0, + "distillation_loss": 0.08396127820014954, + "epoch": 7.63, + "learning_rate": 4.902330173333492e-05, + "loss": 0.0825, + "step": 8034, + "task_loss": 0.06926584243774414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999900770969873, + "compression_loss": 0.0, + "distillation_loss": 0.03239300101995468, + "epoch": 7.63, + "learning_rate": 4.9020350346535936e-05, + "loss": 0.0354, + "step": 8035, + "task_loss": 0.062150366604328156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999901532314597, + "compression_loss": 0.0, + "distillation_loss": 0.0414559543132782, + "epoch": 7.63, + "learning_rate": 4.901739459632463e-05, + "loss": 0.0448, + "step": 8036, + "task_loss": 0.07490754127502441 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999902289754999, + "compression_loss": 0.0, + "distillation_loss": 0.03927509859204292, + "epoch": 7.63, + "learning_rate": 4.901443448323792e-05, + "loss": 0.0396, + "step": 8037, + "task_loss": 0.04217911139130592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999903043301113, + "compression_loss": 0.0, + "distillation_loss": 0.10875845700502396, + "epoch": 7.63, + "learning_rate": 4.901147000781355e-05, + "loss": 0.1031, + "step": 8038, + "task_loss": 0.05257695913314819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999903792962978, + "compression_loss": 0.0, + "distillation_loss": 0.18646162748336792, + "epoch": 7.63, + "learning_rate": 4.9008501170589996e-05, + "loss": 0.1765, + "step": 8039, + "task_loss": 0.08664773404598236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799990453875063, + "compression_loss": 0.0, + "distillation_loss": 0.13723516464233398, + "epoch": 7.64, + "learning_rate": 4.900552797210658e-05, + "loss": 0.1398, + "step": 8040, + "task_loss": 0.16254064440727234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999905280674106, + "compression_loss": 0.0, + "distillation_loss": 0.05691935867071152, + "epoch": 7.64, + "learning_rate": 4.90025504129034e-05, + "loss": 0.0737, + "step": 8041, + "task_loss": 0.22483864426612854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999906018743442, + "compression_loss": 0.0, + "distillation_loss": 0.017723996192216873, + "epoch": 7.64, + "learning_rate": 4.8999568493521345e-05, + "loss": 0.0164, + "step": 8042, + "task_loss": 0.004084032028913498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999906752968677, + "compression_loss": 0.0, + "distillation_loss": 0.07552896440029144, + "epoch": 7.64, + "learning_rate": 4.899658221450208e-05, + "loss": 0.0805, + "step": 8043, + "task_loss": 0.1257321536540985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999907483359846, + "compression_loss": 0.0, + "distillation_loss": 0.10802210122346878, + "epoch": 7.64, + "learning_rate": 4.899359157638809e-05, + "loss": 0.1099, + "step": 8044, + "task_loss": 0.12680913507938385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999908209926986, + "compression_loss": 0.0, + "distillation_loss": 0.12618671357631683, + "epoch": 7.64, + "learning_rate": 4.899059657972264e-05, + "loss": 0.1189, + "step": 8045, + "task_loss": 0.0529848150908947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999908932680134, + "compression_loss": 0.0, + "distillation_loss": 0.027106711640954018, + "epoch": 7.64, + "learning_rate": 4.898759722504977e-05, + "loss": 0.0254, + "step": 8046, + "task_loss": 0.010013708844780922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999909651629327, + "compression_loss": 0.0, + "distillation_loss": 0.02689509466290474, + "epoch": 7.64, + "learning_rate": 4.8984593512914356e-05, + "loss": 0.0332, + "step": 8047, + "task_loss": 0.08948872238397598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999910366784602, + "compression_loss": 0.0, + "distillation_loss": 0.19355759024620056, + "epoch": 7.64, + "learning_rate": 4.898158544386201e-05, + "loss": 0.1822, + "step": 8048, + "task_loss": 0.08036290854215622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999911078155995, + "compression_loss": 0.0, + "distillation_loss": 0.029629534110426903, + "epoch": 7.64, + "learning_rate": 4.897857301843917e-05, + "loss": 0.0298, + "step": 8049, + "task_loss": 0.030841048806905746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999911785753543, + "compression_loss": 0.0, + "distillation_loss": 0.12482485175132751, + "epoch": 7.64, + "learning_rate": 4.897555623719306e-05, + "loss": 0.129, + "step": 8050, + "task_loss": 0.1669907122850418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999912489587283, + "compression_loss": 0.0, + "distillation_loss": 0.11259239912033081, + "epoch": 7.65, + "learning_rate": 4.897253510067169e-05, + "loss": 0.1228, + "step": 8051, + "task_loss": 0.21496890485286713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999913189667253, + "compression_loss": 0.0, + "distillation_loss": 0.10157469660043716, + "epoch": 7.65, + "learning_rate": 4.896950960942387e-05, + "loss": 0.1013, + "step": 8052, + "task_loss": 0.09880008548498154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999913886003488, + "compression_loss": 0.0, + "distillation_loss": 0.06962237507104874, + "epoch": 7.65, + "learning_rate": 4.896647976399919e-05, + "loss": 0.0682, + "step": 8053, + "task_loss": 0.05498020350933075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999914578606026, + "compression_loss": 0.0, + "distillation_loss": 0.02622218243777752, + "epoch": 7.65, + "learning_rate": 4.896344556494804e-05, + "loss": 0.0248, + "step": 8054, + "task_loss": 0.011739548295736313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999915267484903, + "compression_loss": 0.0, + "distillation_loss": 0.05761996656656265, + "epoch": 7.65, + "learning_rate": 4.8960407012821584e-05, + "loss": 0.0619, + "step": 8055, + "task_loss": 0.10011887550354004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999915952650157, + "compression_loss": 0.0, + "distillation_loss": 0.028014160692691803, + "epoch": 7.65, + "learning_rate": 4.895736410817181e-05, + "loss": 0.026, + "step": 8056, + "task_loss": 0.007695335894823074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999916634111823, + "compression_loss": 0.0, + "distillation_loss": 0.018892180174589157, + "epoch": 7.65, + "learning_rate": 4.8954316851551465e-05, + "loss": 0.0177, + "step": 8057, + "task_loss": 0.006759140640497208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799991731187994, + "compression_loss": 0.0, + "distillation_loss": 0.13491389155387878, + "epoch": 7.65, + "learning_rate": 4.895126524351409e-05, + "loss": 0.1503, + "step": 8058, + "task_loss": 0.2890867292881012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999917985964543, + "compression_loss": 0.0, + "distillation_loss": 0.04141715168952942, + "epoch": 7.65, + "learning_rate": 4.8948209284614046e-05, + "loss": 0.039, + "step": 8059, + "task_loss": 0.01771230250597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999918656375671, + "compression_loss": 0.0, + "distillation_loss": 0.016739333048462868, + "epoch": 7.65, + "learning_rate": 4.894514897540643e-05, + "loss": 0.0158, + "step": 8060, + "task_loss": 0.007118521258234978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999919323123358, + "compression_loss": 0.0, + "distillation_loss": 0.01123537216335535, + "epoch": 7.66, + "learning_rate": 4.89420843164472e-05, + "loss": 0.0202, + "step": 8061, + "task_loss": 0.10039521753787994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999919986217642, + "compression_loss": 0.0, + "distillation_loss": 0.019477343186736107, + "epoch": 7.66, + "learning_rate": 4.893901530829304e-05, + "loss": 0.0179, + "step": 8062, + "task_loss": 0.0040080491453409195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799992064566856, + "compression_loss": 0.0, + "distillation_loss": 0.04232557862997055, + "epoch": 7.66, + "learning_rate": 4.8935941951501463e-05, + "loss": 0.0474, + "step": 8063, + "task_loss": 0.0932355746626854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999921301486149, + "compression_loss": 0.0, + "distillation_loss": 0.10348470509052277, + "epoch": 7.66, + "learning_rate": 4.893286424663075e-05, + "loss": 0.0977, + "step": 8064, + "task_loss": 0.045336902141571045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999921953680447, + "compression_loss": 0.0, + "distillation_loss": 0.01743854209780693, + "epoch": 7.66, + "learning_rate": 4.892978219423998e-05, + "loss": 0.0331, + "step": 8065, + "task_loss": 0.17381621897220612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999922602261488, + "compression_loss": 0.0, + "distillation_loss": 0.01273958757519722, + "epoch": 7.66, + "learning_rate": 4.892669579488903e-05, + "loss": 0.0186, + "step": 8066, + "task_loss": 0.07183681428432465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799992324723931, + "compression_loss": 0.0, + "distillation_loss": 0.11235454678535461, + "epoch": 7.66, + "learning_rate": 4.892360504913856e-05, + "loss": 0.1105, + "step": 8067, + "task_loss": 0.09365284442901611 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999923888623951, + "compression_loss": 0.0, + "distillation_loss": 0.05907963961362839, + "epoch": 7.66, + "learning_rate": 4.8920509957550016e-05, + "loss": 0.054, + "step": 8068, + "task_loss": 0.008163506165146828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999924526425447, + "compression_loss": 0.0, + "distillation_loss": 0.023158248513936996, + "epoch": 7.66, + "learning_rate": 4.8917410520685635e-05, + "loss": 0.0351, + "step": 8069, + "task_loss": 0.14244556427001953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999925160653834, + "compression_loss": 0.0, + "distillation_loss": 0.016128631308674812, + "epoch": 7.66, + "learning_rate": 4.891430673910844e-05, + "loss": 0.015, + "step": 8070, + "task_loss": 0.0045019593089818954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799992579131915, + "compression_loss": 0.0, + "distillation_loss": 0.023808129131793976, + "epoch": 7.66, + "learning_rate": 4.891119861338226e-05, + "loss": 0.022, + "step": 8071, + "task_loss": 0.005267852917313576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999926418431431, + "compression_loss": 0.0, + "distillation_loss": 0.02601657807826996, + "epoch": 7.67, + "learning_rate": 4.8908086144071694e-05, + "loss": 0.0241, + "step": 8072, + "task_loss": 0.0071242451667785645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999927042000715, + "compression_loss": 0.0, + "distillation_loss": 0.045584745705127716, + "epoch": 7.67, + "learning_rate": 4.8904969331742136e-05, + "loss": 0.0496, + "step": 8073, + "task_loss": 0.08568625897169113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999927662037037, + "compression_loss": 0.0, + "distillation_loss": 0.021556910127401352, + "epoch": 7.67, + "learning_rate": 4.890184817695976e-05, + "loss": 0.0281, + "step": 8074, + "task_loss": 0.08685538172721863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999928278550436, + "compression_loss": 0.0, + "distillation_loss": 0.09852443635463715, + "epoch": 7.67, + "learning_rate": 4.8898722680291564e-05, + "loss": 0.1017, + "step": 8075, + "task_loss": 0.13034701347351074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999928891550947, + "compression_loss": 0.0, + "distillation_loss": 0.034188371151685715, + "epoch": 7.67, + "learning_rate": 4.8895592842305295e-05, + "loss": 0.0401, + "step": 8076, + "task_loss": 0.09356120228767395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999929501048607, + "compression_loss": 0.0, + "distillation_loss": 0.02725556679069996, + "epoch": 7.67, + "learning_rate": 4.88924586635695e-05, + "loss": 0.0348, + "step": 8077, + "task_loss": 0.1023518443107605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999930107053455, + "compression_loss": 0.0, + "distillation_loss": 0.0407828688621521, + "epoch": 7.67, + "learning_rate": 4.888932014465352e-05, + "loss": 0.0427, + "step": 8078, + "task_loss": 0.060073304921388626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999930709575525, + "compression_loss": 0.0, + "distillation_loss": 0.02005520462989807, + "epoch": 7.67, + "learning_rate": 4.888617728612749e-05, + "loss": 0.0187, + "step": 8079, + "task_loss": 0.006268629804253578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999931308624855, + "compression_loss": 0.0, + "distillation_loss": 0.016719898208975792, + "epoch": 7.67, + "learning_rate": 4.888303008856231e-05, + "loss": 0.0154, + "step": 8080, + "task_loss": 0.0033553019165992737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999931904211481, + "compression_loss": 0.0, + "distillation_loss": 0.01692567765712738, + "epoch": 7.67, + "learning_rate": 4.88798785525297e-05, + "loss": 0.0213, + "step": 8081, + "task_loss": 0.060581427067518234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999932496345442, + "compression_loss": 0.0, + "distillation_loss": 0.03452851623296738, + "epoch": 7.68, + "learning_rate": 4.887672267860214e-05, + "loss": 0.0371, + "step": 8082, + "task_loss": 0.059820882976055145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999933085036772, + "compression_loss": 0.0, + "distillation_loss": 0.022003114223480225, + "epoch": 7.68, + "learning_rate": 4.887356246735292e-05, + "loss": 0.0203, + "step": 8083, + "task_loss": 0.004899036139249802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799993367029551, + "compression_loss": 0.0, + "distillation_loss": 0.07817333936691284, + "epoch": 7.68, + "learning_rate": 4.8870397919356094e-05, + "loss": 0.0767, + "step": 8084, + "task_loss": 0.0634901374578476 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999934252131693, + "compression_loss": 0.0, + "distillation_loss": 0.08945944160223007, + "epoch": 7.68, + "learning_rate": 4.8867229035186526e-05, + "loss": 0.0865, + "step": 8085, + "task_loss": 0.060247667133808136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999934830555356, + "compression_loss": 0.0, + "distillation_loss": 0.016880009323358536, + "epoch": 7.68, + "learning_rate": 4.886405581541986e-05, + "loss": 0.0156, + "step": 8086, + "task_loss": 0.0044879671186208725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999935405576537, + "compression_loss": 0.0, + "distillation_loss": 0.02322390116751194, + "epoch": 7.68, + "learning_rate": 4.886087826063252e-05, + "loss": 0.0216, + "step": 8087, + "task_loss": 0.006913455203175545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999935977205271, + "compression_loss": 0.0, + "distillation_loss": 0.055455997586250305, + "epoch": 7.68, + "learning_rate": 4.8857696371401735e-05, + "loss": 0.0505, + "step": 8088, + "task_loss": 0.00570225715637207 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999936545451598, + "compression_loss": 0.0, + "distillation_loss": 0.28629836440086365, + "epoch": 7.68, + "learning_rate": 4.88545101483055e-05, + "loss": 0.2723, + "step": 8089, + "task_loss": 0.14600038528442383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999937110325553, + "compression_loss": 0.0, + "distillation_loss": 0.04520022124052048, + "epoch": 7.68, + "learning_rate": 4.885131959192262e-05, + "loss": 0.0451, + "step": 8090, + "task_loss": 0.0442010760307312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999937671837173, + "compression_loss": 0.0, + "distillation_loss": 0.0367523655295372, + "epoch": 7.68, + "learning_rate": 4.884812470283265e-05, + "loss": 0.0401, + "step": 8091, + "task_loss": 0.07056266069412231 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999938229996495, + "compression_loss": 0.0, + "distillation_loss": 0.02565024048089981, + "epoch": 7.68, + "learning_rate": 4.884492548161599e-05, + "loss": 0.0237, + "step": 8092, + "task_loss": 0.005736216902732849 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999938784813555, + "compression_loss": 0.0, + "distillation_loss": 0.14084674417972565, + "epoch": 7.69, + "learning_rate": 4.8841721928853776e-05, + "loss": 0.142, + "step": 8093, + "task_loss": 0.15224382281303406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999939336298391, + "compression_loss": 0.0, + "distillation_loss": 0.04901870712637901, + "epoch": 7.69, + "learning_rate": 4.8838514045127945e-05, + "loss": 0.0518, + "step": 8094, + "task_loss": 0.07676392048597336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999939884461038, + "compression_loss": 0.0, + "distillation_loss": 0.07762657105922699, + "epoch": 7.69, + "learning_rate": 4.883530183102123e-05, + "loss": 0.0828, + "step": 8095, + "task_loss": 0.1296282708644867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999940429311535, + "compression_loss": 0.0, + "distillation_loss": 0.060121599584817886, + "epoch": 7.69, + "learning_rate": 4.883208528711715e-05, + "loss": 0.0675, + "step": 8096, + "task_loss": 0.13351008296012878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999940970859918, + "compression_loss": 0.0, + "distillation_loss": 0.07687482237815857, + "epoch": 7.69, + "learning_rate": 4.8828864413999995e-05, + "loss": 0.0771, + "step": 8097, + "task_loss": 0.07926255464553833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999941509116224, + "compression_loss": 0.0, + "distillation_loss": 0.06490933150053024, + "epoch": 7.69, + "learning_rate": 4.8825639212254865e-05, + "loss": 0.0652, + "step": 8098, + "task_loss": 0.0676705464720726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999942044090489, + "compression_loss": 0.0, + "distillation_loss": 0.03710645064711571, + "epoch": 7.69, + "learning_rate": 4.882240968246762e-05, + "loss": 0.0375, + "step": 8099, + "task_loss": 0.04076388105750084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999942575792751, + "compression_loss": 0.0, + "distillation_loss": 0.02674899250268936, + "epoch": 7.69, + "learning_rate": 4.8819175825224925e-05, + "loss": 0.0249, + "step": 8100, + "task_loss": 0.0087125264108181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999943104233046, + "compression_loss": 0.0, + "distillation_loss": 0.06944914162158966, + "epoch": 7.69, + "learning_rate": 4.881593764111424e-05, + "loss": 0.0701, + "step": 8101, + "task_loss": 0.07624385505914688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799994362942141, + "compression_loss": 0.0, + "distillation_loss": 0.015938565135002136, + "epoch": 7.69, + "learning_rate": 4.8812695130723775e-05, + "loss": 0.0148, + "step": 8102, + "task_loss": 0.004970138892531395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999944151367882, + "compression_loss": 0.0, + "distillation_loss": 0.07653353363275528, + "epoch": 7.7, + "learning_rate": 4.880944829464256e-05, + "loss": 0.0844, + "step": 8103, + "task_loss": 0.155586376786232 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999944670082497, + "compression_loss": 0.0, + "distillation_loss": 0.05173022672533989, + "epoch": 7.7, + "learning_rate": 4.880619713346039e-05, + "loss": 0.0512, + "step": 8104, + "task_loss": 0.045994266867637634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999945185575293, + "compression_loss": 0.0, + "distillation_loss": 0.04804886877536774, + "epoch": 7.7, + "learning_rate": 4.8802941647767856e-05, + "loss": 0.0604, + "step": 8105, + "task_loss": 0.1717759370803833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999945697856307, + "compression_loss": 0.0, + "distillation_loss": 0.08852384239435196, + "epoch": 7.7, + "learning_rate": 4.879968183815634e-05, + "loss": 0.0908, + "step": 8106, + "task_loss": 0.11161627620458603 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999946206935573, + "compression_loss": 0.0, + "distillation_loss": 0.11721711605787277, + "epoch": 7.7, + "learning_rate": 4.8796417705217994e-05, + "loss": 0.1135, + "step": 8107, + "task_loss": 0.07980488240718842 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999946712823132, + "compression_loss": 0.0, + "distillation_loss": 0.14771784842014313, + "epoch": 7.7, + "learning_rate": 4.879314924954577e-05, + "loss": 0.1429, + "step": 8108, + "task_loss": 0.09997382760047913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999947215529017, + "compression_loss": 0.0, + "distillation_loss": 0.07733377814292908, + "epoch": 7.7, + "learning_rate": 4.87898764717334e-05, + "loss": 0.0812, + "step": 8109, + "task_loss": 0.1161360889673233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999947715063268, + "compression_loss": 0.0, + "distillation_loss": 0.06381769478321075, + "epoch": 7.7, + "learning_rate": 4.8786599372375384e-05, + "loss": 0.0612, + "step": 8110, + "task_loss": 0.03766363486647606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799994821143592, + "compression_loss": 0.0, + "distillation_loss": 0.16585132479667664, + "epoch": 7.7, + "learning_rate": 4.878331795206705e-05, + "loss": 0.1596, + "step": 8111, + "task_loss": 0.10337799787521362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799994870465701, + "compression_loss": 0.0, + "distillation_loss": 0.07819118350744247, + "epoch": 7.7, + "learning_rate": 4.878003221140446e-05, + "loss": 0.0886, + "step": 8112, + "task_loss": 0.18247146904468536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999949194736575, + "compression_loss": 0.0, + "distillation_loss": 0.014311755076050758, + "epoch": 7.7, + "learning_rate": 4.877674215098449e-05, + "loss": 0.0136, + "step": 8113, + "task_loss": 0.007542043924331665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999949681684653, + "compression_loss": 0.0, + "distillation_loss": 0.02407708764076233, + "epoch": 7.71, + "learning_rate": 4.87734477714048e-05, + "loss": 0.0321, + "step": 8114, + "task_loss": 0.10392209887504578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999950165511278, + "compression_loss": 0.0, + "distillation_loss": 0.0483178049325943, + "epoch": 7.71, + "learning_rate": 4.8770149073263833e-05, + "loss": 0.0544, + "step": 8115, + "task_loss": 0.10924448817968369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999950646226489, + "compression_loss": 0.0, + "distillation_loss": 0.06605200469493866, + "epoch": 7.71, + "learning_rate": 4.87668460571608e-05, + "loss": 0.0702, + "step": 8116, + "task_loss": 0.10719858109951019 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999951123840323, + "compression_loss": 0.0, + "distillation_loss": 0.12172948569059372, + "epoch": 7.71, + "learning_rate": 4.8763538723695726e-05, + "loss": 0.132, + "step": 8117, + "task_loss": 0.22487123310565948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999951598362816, + "compression_loss": 0.0, + "distillation_loss": 0.037446945905685425, + "epoch": 7.71, + "learning_rate": 4.87602270734694e-05, + "loss": 0.0366, + "step": 8118, + "task_loss": 0.028905699029564857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999952069804004, + "compression_loss": 0.0, + "distillation_loss": 0.02980622835457325, + "epoch": 7.71, + "learning_rate": 4.8756911107083387e-05, + "loss": 0.0321, + "step": 8119, + "task_loss": 0.05290570110082626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999952538173926, + "compression_loss": 0.0, + "distillation_loss": 0.02518196403980255, + "epoch": 7.71, + "learning_rate": 4.875359082514006e-05, + "loss": 0.0235, + "step": 8120, + "task_loss": 0.008489791303873062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999953003482617, + "compression_loss": 0.0, + "distillation_loss": 0.019510727375745773, + "epoch": 7.71, + "learning_rate": 4.8750266228242555e-05, + "loss": 0.0322, + "step": 8121, + "task_loss": 0.14628659188747406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999953465740115, + "compression_loss": 0.0, + "distillation_loss": 0.0406201109290123, + "epoch": 7.71, + "learning_rate": 4.874693731699481e-05, + "loss": 0.0407, + "step": 8122, + "task_loss": 0.04184962809085846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999953924956456, + "compression_loss": 0.0, + "distillation_loss": 0.03238911181688309, + "epoch": 7.71, + "learning_rate": 4.8743604092001544e-05, + "loss": 0.0395, + "step": 8123, + "task_loss": 0.10380247235298157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999954381141676, + "compression_loss": 0.0, + "distillation_loss": 0.08192041516304016, + "epoch": 7.72, + "learning_rate": 4.8740266553868236e-05, + "loss": 0.0867, + "step": 8124, + "task_loss": 0.12997817993164062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999954834305814, + "compression_loss": 0.0, + "distillation_loss": 0.040653470903635025, + "epoch": 7.72, + "learning_rate": 4.873692470320117e-05, + "loss": 0.0513, + "step": 8125, + "task_loss": 0.14687559008598328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999955284458905, + "compression_loss": 0.0, + "distillation_loss": 0.07809877395629883, + "epoch": 7.72, + "learning_rate": 4.8733578540607425e-05, + "loss": 0.0812, + "step": 8126, + "task_loss": 0.10933775454759598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999955731610988, + "compression_loss": 0.0, + "distillation_loss": 0.0384988859295845, + "epoch": 7.72, + "learning_rate": 4.8730228066694825e-05, + "loss": 0.0491, + "step": 8127, + "task_loss": 0.1440470814704895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999956175772097, + "compression_loss": 0.0, + "distillation_loss": 0.015554778277873993, + "epoch": 7.72, + "learning_rate": 4.872687328207202e-05, + "loss": 0.0146, + "step": 8128, + "task_loss": 0.006275909021496773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999956616952271, + "compression_loss": 0.0, + "distillation_loss": 0.013756733387708664, + "epoch": 7.72, + "learning_rate": 4.872351418734841e-05, + "loss": 0.0128, + "step": 8129, + "task_loss": 0.004298551008105278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999957055161545, + "compression_loss": 0.0, + "distillation_loss": 0.037815727293491364, + "epoch": 7.72, + "learning_rate": 4.8720150783134196e-05, + "loss": 0.0426, + "step": 8130, + "task_loss": 0.08522268384695053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999957490409958, + "compression_loss": 0.0, + "distillation_loss": 0.0777861624956131, + "epoch": 7.72, + "learning_rate": 4.871678307004035e-05, + "loss": 0.0729, + "step": 8131, + "task_loss": 0.0290694423019886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999957922707545, + "compression_loss": 0.0, + "distillation_loss": 0.021506596356630325, + "epoch": 7.72, + "learning_rate": 4.8713411048678635e-05, + "loss": 0.0198, + "step": 8132, + "task_loss": 0.004600740969181061 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999958352064344, + "compression_loss": 0.0, + "distillation_loss": 0.13485758006572723, + "epoch": 7.72, + "learning_rate": 4.8710034719661614e-05, + "loss": 0.1337, + "step": 8133, + "task_loss": 0.1231408417224884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799995877849039, + "compression_loss": 0.0, + "distillation_loss": 0.014092806726694107, + "epoch": 7.72, + "learning_rate": 4.870665408360258e-05, + "loss": 0.0216, + "step": 8134, + "task_loss": 0.08945970237255096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999959201995722, + "compression_loss": 0.0, + "distillation_loss": 0.04725116491317749, + "epoch": 7.73, + "learning_rate": 4.870326914111567e-05, + "loss": 0.044, + "step": 8135, + "task_loss": 0.014543771743774414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999959622590376, + "compression_loss": 0.0, + "distillation_loss": 0.09922022372484207, + "epoch": 7.73, + "learning_rate": 4.8699879892815756e-05, + "loss": 0.09, + "step": 8136, + "task_loss": 0.007207704707980156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999960040284388, + "compression_loss": 0.0, + "distillation_loss": 0.062455762177705765, + "epoch": 7.73, + "learning_rate": 4.8696486339318524e-05, + "loss": 0.0625, + "step": 8137, + "task_loss": 0.06264299154281616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999960455087797, + "compression_loss": 0.0, + "distillation_loss": 0.08884945511817932, + "epoch": 7.73, + "learning_rate": 4.8693088481240424e-05, + "loss": 0.0883, + "step": 8138, + "task_loss": 0.0832391083240509 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999960867010637, + "compression_loss": 0.0, + "distillation_loss": 0.03622208908200264, + "epoch": 7.73, + "learning_rate": 4.86896863191987e-05, + "loss": 0.0334, + "step": 8139, + "task_loss": 0.007611839100718498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999961276062947, + "compression_loss": 0.0, + "distillation_loss": 0.05895059183239937, + "epoch": 7.73, + "learning_rate": 4.8686279853811356e-05, + "loss": 0.0593, + "step": 8140, + "task_loss": 0.06226490065455437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999961682254764, + "compression_loss": 0.0, + "distillation_loss": 0.03609544038772583, + "epoch": 7.73, + "learning_rate": 4.8682869085697206e-05, + "loss": 0.0412, + "step": 8141, + "task_loss": 0.0870952308177948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999962085596122, + "compression_loss": 0.0, + "distillation_loss": 0.09176631271839142, + "epoch": 7.73, + "learning_rate": 4.8679454015475835e-05, + "loss": 0.0958, + "step": 8142, + "task_loss": 0.13180381059646606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999962486097061, + "compression_loss": 0.0, + "distillation_loss": 0.15721860527992249, + "epoch": 7.73, + "learning_rate": 4.867603464376759e-05, + "loss": 0.1509, + "step": 8143, + "task_loss": 0.09391912072896957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999962883767615, + "compression_loss": 0.0, + "distillation_loss": 0.020519878715276718, + "epoch": 7.73, + "learning_rate": 4.867261097119363e-05, + "loss": 0.0241, + "step": 8144, + "task_loss": 0.0560954324901104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999963278617823, + "compression_loss": 0.0, + "distillation_loss": 0.10751358419656754, + "epoch": 7.74, + "learning_rate": 4.8669182998375884e-05, + "loss": 0.1109, + "step": 8145, + "task_loss": 0.14114311337471008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999963670657722, + "compression_loss": 0.0, + "distillation_loss": 0.06851037591695786, + "epoch": 7.74, + "learning_rate": 4.8665750725937045e-05, + "loss": 0.0684, + "step": 8146, + "task_loss": 0.06784656643867493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999964059897348, + "compression_loss": 0.0, + "distillation_loss": 0.0376189686357975, + "epoch": 7.74, + "learning_rate": 4.866231415450062e-05, + "loss": 0.0377, + "step": 8147, + "task_loss": 0.03808595612645149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999964446346736, + "compression_loss": 0.0, + "distillation_loss": 0.03249119967222214, + "epoch": 7.74, + "learning_rate": 4.8658873284690866e-05, + "loss": 0.0449, + "step": 8148, + "task_loss": 0.15644009411334991 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999964830015925, + "compression_loss": 0.0, + "distillation_loss": 0.04549805074930191, + "epoch": 7.74, + "learning_rate": 4.865542811713284e-05, + "loss": 0.0521, + "step": 8149, + "task_loss": 0.11196024715900421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999965210914953, + "compression_loss": 0.0, + "distillation_loss": 0.07704181969165802, + "epoch": 7.74, + "learning_rate": 4.865197865245237e-05, + "loss": 0.0857, + "step": 8150, + "task_loss": 0.16380248963832855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999965589053855, + "compression_loss": 0.0, + "distillation_loss": 0.06383125483989716, + "epoch": 7.74, + "learning_rate": 4.8648524891276066e-05, + "loss": 0.0624, + "step": 8151, + "task_loss": 0.049697332084178925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999965964442667, + "compression_loss": 0.0, + "distillation_loss": 0.10283863544464111, + "epoch": 7.74, + "learning_rate": 4.8645066834231325e-05, + "loss": 0.1101, + "step": 8152, + "task_loss": 0.17590855062007904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999966337091426, + "compression_loss": 0.0, + "distillation_loss": 0.09706176817417145, + "epoch": 7.74, + "learning_rate": 4.8641604481946314e-05, + "loss": 0.1095, + "step": 8153, + "task_loss": 0.22108034789562225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999966707010172, + "compression_loss": 0.0, + "distillation_loss": 0.08932052552700043, + "epoch": 7.74, + "learning_rate": 4.863813783504999e-05, + "loss": 0.0871, + "step": 8154, + "task_loss": 0.0668034479022026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999967074208939, + "compression_loss": 0.0, + "distillation_loss": 0.022766336798667908, + "epoch": 7.74, + "learning_rate": 4.863466689417209e-05, + "loss": 0.0263, + "step": 8155, + "task_loss": 0.058080703020095825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999967438697764, + "compression_loss": 0.0, + "distillation_loss": 0.04971291124820709, + "epoch": 7.75, + "learning_rate": 4.863119165994312e-05, + "loss": 0.0537, + "step": 8156, + "task_loss": 0.08949106186628342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999967800486684, + "compression_loss": 0.0, + "distillation_loss": 0.04288540780544281, + "epoch": 7.75, + "learning_rate": 4.862771213299438e-05, + "loss": 0.039, + "step": 8157, + "task_loss": 0.004233557730913162 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999968159585735, + "compression_loss": 0.0, + "distillation_loss": 0.01929197832942009, + "epoch": 7.75, + "learning_rate": 4.8624228313957937e-05, + "loss": 0.0265, + "step": 8158, + "task_loss": 0.09141400456428528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999968516004956, + "compression_loss": 0.0, + "distillation_loss": 0.04137708246707916, + "epoch": 7.75, + "learning_rate": 4.862074020346664e-05, + "loss": 0.0378, + "step": 8159, + "task_loss": 0.0055605582892894745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999968869754382, + "compression_loss": 0.0, + "distillation_loss": 0.01853022351861, + "epoch": 7.75, + "learning_rate": 4.8617247802154134e-05, + "loss": 0.0252, + "step": 8160, + "task_loss": 0.085117407143116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999969220844052, + "compression_loss": 0.0, + "distillation_loss": 0.07992290705442429, + "epoch": 7.75, + "learning_rate": 4.861375111065482e-05, + "loss": 0.0841, + "step": 8161, + "task_loss": 0.12121039628982544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999969569284, + "compression_loss": 0.0, + "distillation_loss": 0.02700880542397499, + "epoch": 7.75, + "learning_rate": 4.861025012960389e-05, + "loss": 0.0271, + "step": 8162, + "task_loss": 0.027840938419103622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999969915084264, + "compression_loss": 0.0, + "distillation_loss": 0.04433220624923706, + "epoch": 7.75, + "learning_rate": 4.8606744859637316e-05, + "loss": 0.0487, + "step": 8163, + "task_loss": 0.08848340809345245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999970258254882, + "compression_loss": 0.0, + "distillation_loss": 0.022559959441423416, + "epoch": 7.75, + "learning_rate": 4.8603235301391844e-05, + "loss": 0.0306, + "step": 8164, + "task_loss": 0.10257212072610855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999970598805889, + "compression_loss": 0.0, + "distillation_loss": 0.020599160343408585, + "epoch": 7.75, + "learning_rate": 4.859972145550501e-05, + "loss": 0.0287, + "step": 8165, + "task_loss": 0.10190172493457794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999970936747323, + "compression_loss": 0.0, + "distillation_loss": 0.044832177460193634, + "epoch": 7.75, + "learning_rate": 4.859620332261512e-05, + "loss": 0.0469, + "step": 8166, + "task_loss": 0.0659225732088089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999971272089219, + "compression_loss": 0.0, + "distillation_loss": 0.0355226993560791, + "epoch": 7.76, + "learning_rate": 4.8592680903361247e-05, + "loss": 0.0403, + "step": 8167, + "task_loss": 0.08283548802137375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999971604841616, + "compression_loss": 0.0, + "distillation_loss": 0.06762556731700897, + "epoch": 7.76, + "learning_rate": 4.858915419838327e-05, + "loss": 0.0614, + "step": 8168, + "task_loss": 0.005646536126732826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799997193501455, + "compression_loss": 0.0, + "distillation_loss": 0.02109435945749283, + "epoch": 7.76, + "learning_rate": 4.8585623208321825e-05, + "loss": 0.0389, + "step": 8169, + "task_loss": 0.19891361892223358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999972262618058, + "compression_loss": 0.0, + "distillation_loss": 0.029250595718622208, + "epoch": 7.76, + "learning_rate": 4.858208793381833e-05, + "loss": 0.0603, + "step": 8170, + "task_loss": 0.3400185704231262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999972587662176, + "compression_loss": 0.0, + "distillation_loss": 0.10609747469425201, + "epoch": 7.76, + "learning_rate": 4.8578548375514995e-05, + "loss": 0.0997, + "step": 8171, + "task_loss": 0.042343318462371826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999972910156943, + "compression_loss": 0.0, + "distillation_loss": 0.03819608315825462, + "epoch": 7.76, + "learning_rate": 4.8575004534054794e-05, + "loss": 0.036, + "step": 8172, + "task_loss": 0.016501694917678833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999973230112394, + "compression_loss": 0.0, + "distillation_loss": 0.020714174956083298, + "epoch": 7.76, + "learning_rate": 4.8571456410081474e-05, + "loss": 0.0334, + "step": 8173, + "task_loss": 0.14768551290035248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999973547538565, + "compression_loss": 0.0, + "distillation_loss": 0.04434049874544144, + "epoch": 7.76, + "learning_rate": 4.856790400423958e-05, + "loss": 0.0539, + "step": 8174, + "task_loss": 0.13957616686820984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999973862445494, + "compression_loss": 0.0, + "distillation_loss": 0.024984436109662056, + "epoch": 7.76, + "learning_rate": 4.856434731717442e-05, + "loss": 0.0395, + "step": 8175, + "task_loss": 0.1703425943851471 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999974174843218, + "compression_loss": 0.0, + "distillation_loss": 0.013447124511003494, + "epoch": 7.76, + "learning_rate": 4.8560786349532075e-05, + "loss": 0.0169, + "step": 8176, + "task_loss": 0.04839882627129555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999974484741774, + "compression_loss": 0.0, + "distillation_loss": 0.04491272196173668, + "epoch": 7.77, + "learning_rate": 4.855722110195943e-05, + "loss": 0.0685, + "step": 8177, + "task_loss": 0.28108105063438416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999974792151199, + "compression_loss": 0.0, + "distillation_loss": 0.022170700132846832, + "epoch": 7.77, + "learning_rate": 4.8553651575104114e-05, + "loss": 0.033, + "step": 8178, + "task_loss": 0.130245178937912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999975097081529, + "compression_loss": 0.0, + "distillation_loss": 0.0931999534368515, + "epoch": 7.77, + "learning_rate": 4.8550077769614554e-05, + "loss": 0.0971, + "step": 8179, + "task_loss": 0.1317388266324997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79999753995428, + "compression_loss": 0.0, + "distillation_loss": 0.019387193024158478, + "epoch": 7.77, + "learning_rate": 4.8546499686139944e-05, + "loss": 0.0178, + "step": 8180, + "task_loss": 0.0039298199117183685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799997569954505, + "compression_loss": 0.0, + "distillation_loss": 0.014103731140494347, + "epoch": 7.77, + "learning_rate": 4.854291732533027e-05, + "loss": 0.0132, + "step": 8181, + "task_loss": 0.005378361791372299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999975997098316, + "compression_loss": 0.0, + "distillation_loss": 0.019791752099990845, + "epoch": 7.77, + "learning_rate": 4.853933068783628e-05, + "loss": 0.0279, + "step": 8182, + "task_loss": 0.10089607536792755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999976292212635, + "compression_loss": 0.0, + "distillation_loss": 0.08148065209388733, + "epoch": 7.77, + "learning_rate": 4.853573977430951e-05, + "loss": 0.0862, + "step": 8183, + "task_loss": 0.128190815448761 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999976584898043, + "compression_loss": 0.0, + "distillation_loss": 0.045781854540109634, + "epoch": 7.77, + "learning_rate": 4.8532144585402254e-05, + "loss": 0.0464, + "step": 8184, + "task_loss": 0.051745254546403885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999976875164577, + "compression_loss": 0.0, + "distillation_loss": 0.0476515106856823, + "epoch": 7.77, + "learning_rate": 4.85285451217676e-05, + "loss": 0.0505, + "step": 8185, + "task_loss": 0.07574954628944397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999977163022274, + "compression_loss": 0.0, + "distillation_loss": 0.15082959830760956, + "epoch": 7.77, + "learning_rate": 4.8524941384059415e-05, + "loss": 0.1472, + "step": 8186, + "task_loss": 0.11474103480577469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999977448481171, + "compression_loss": 0.0, + "distillation_loss": 0.016797035932540894, + "epoch": 7.77, + "learning_rate": 4.8521333372932326e-05, + "loss": 0.0269, + "step": 8187, + "task_loss": 0.11737027764320374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999977731551304, + "compression_loss": 0.0, + "distillation_loss": 0.035971708595752716, + "epoch": 7.78, + "learning_rate": 4.851772108904175e-05, + "loss": 0.0345, + "step": 8188, + "task_loss": 0.021439943462610245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999978012242711, + "compression_loss": 0.0, + "distillation_loss": 0.024865467101335526, + "epoch": 7.78, + "learning_rate": 4.851410453304388e-05, + "loss": 0.0321, + "step": 8189, + "task_loss": 0.09683802723884583 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999978290565428, + "compression_loss": 0.0, + "distillation_loss": 0.02018059231340885, + "epoch": 7.78, + "learning_rate": 4.851048370559567e-05, + "loss": 0.0277, + "step": 8190, + "task_loss": 0.09545273333787918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999978566529493, + "compression_loss": 0.0, + "distillation_loss": 0.032620202749967575, + "epoch": 7.78, + "learning_rate": 4.850685860735487e-05, + "loss": 0.0374, + "step": 8191, + "task_loss": 0.08022205531597137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999978840144941, + "compression_loss": 0.0, + "distillation_loss": 0.03163286671042442, + "epoch": 7.78, + "learning_rate": 4.850322923898e-05, + "loss": 0.0294, + "step": 8192, + "task_loss": 0.00926467590034008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799997911142181, + "compression_loss": 0.0, + "distillation_loss": 0.06667460501194, + "epoch": 7.78, + "learning_rate": 4.8499595601130337e-05, + "loss": 0.0806, + "step": 8193, + "task_loss": 0.20638611912727356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999979380370137, + "compression_loss": 0.0, + "distillation_loss": 0.1302226334810257, + "epoch": 7.78, + "learning_rate": 4.849595769446596e-05, + "loss": 0.1247, + "step": 8194, + "task_loss": 0.07523670792579651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999979646999957, + "compression_loss": 0.0, + "distillation_loss": 0.12275674939155579, + "epoch": 7.78, + "learning_rate": 4.849231551964771e-05, + "loss": 0.1306, + "step": 8195, + "task_loss": 0.20129883289337158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999979911321309, + "compression_loss": 0.0, + "distillation_loss": 0.03874664753675461, + "epoch": 7.78, + "learning_rate": 4.848866907733721e-05, + "loss": 0.0359, + "step": 8196, + "task_loss": 0.009907728061079979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999980173344229, + "compression_loss": 0.0, + "distillation_loss": 0.07732771337032318, + "epoch": 7.78, + "learning_rate": 4.848501836819684e-05, + "loss": 0.0893, + "step": 8197, + "task_loss": 0.19742365181446075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999980433078754, + "compression_loss": 0.0, + "distillation_loss": 0.024056117981672287, + "epoch": 7.79, + "learning_rate": 4.848136339288979e-05, + "loss": 0.0229, + "step": 8198, + "task_loss": 0.012888273224234581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799998069053492, + "compression_loss": 0.0, + "distillation_loss": 0.1355004906654358, + "epoch": 7.79, + "learning_rate": 4.8477704152079984e-05, + "loss": 0.1258, + "step": 8199, + "task_loss": 0.03880665823817253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999980945722766, + "compression_loss": 0.0, + "distillation_loss": 0.09072038531303406, + "epoch": 7.79, + "learning_rate": 4.8474040646432153e-05, + "loss": 0.0949, + "step": 8200, + "task_loss": 0.13292811810970306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999981198652326, + "compression_loss": 0.0, + "distillation_loss": 0.04228777810931206, + "epoch": 7.79, + "learning_rate": 4.8470372876611784e-05, + "loss": 0.0399, + "step": 8201, + "task_loss": 0.0188329815864563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999981449333639, + "compression_loss": 0.0, + "distillation_loss": 0.05842405557632446, + "epoch": 7.79, + "learning_rate": 4.846670084328515e-05, + "loss": 0.0558, + "step": 8202, + "task_loss": 0.03242020681500435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999981697776739, + "compression_loss": 0.0, + "distillation_loss": 0.10038132965564728, + "epoch": 7.79, + "learning_rate": 4.846302454711929e-05, + "loss": 0.0992, + "step": 8203, + "task_loss": 0.08900587260723114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999981943991666, + "compression_loss": 0.0, + "distillation_loss": 0.06294578313827515, + "epoch": 7.79, + "learning_rate": 4.845934398878202e-05, + "loss": 0.0766, + "step": 8204, + "task_loss": 0.19954392313957214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999982187988456, + "compression_loss": 0.0, + "distillation_loss": 0.015763722360134125, + "epoch": 7.79, + "learning_rate": 4.845565916894193e-05, + "loss": 0.0244, + "step": 8205, + "task_loss": 0.10191762447357178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999982429777145, + "compression_loss": 0.0, + "distillation_loss": 0.04905875027179718, + "epoch": 7.79, + "learning_rate": 4.8451970088268396e-05, + "loss": 0.0462, + "step": 8206, + "task_loss": 0.0201642494648695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799998266936777, + "compression_loss": 0.0, + "distillation_loss": 0.03759082779288292, + "epoch": 7.79, + "learning_rate": 4.8448276747431545e-05, + "loss": 0.0349, + "step": 8207, + "task_loss": 0.010456247255206108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999982906770369, + "compression_loss": 0.0, + "distillation_loss": 0.01988394558429718, + "epoch": 7.79, + "learning_rate": 4.84445791471023e-05, + "loss": 0.0286, + "step": 8208, + "task_loss": 0.10685549676418304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999983141994976, + "compression_loss": 0.0, + "distillation_loss": 0.1849537491798401, + "epoch": 7.8, + "learning_rate": 4.8440877287952336e-05, + "loss": 0.1814, + "step": 8209, + "task_loss": 0.14974652230739594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999983375051631, + "compression_loss": 0.0, + "distillation_loss": 0.03731781989336014, + "epoch": 7.8, + "learning_rate": 4.8437171170654125e-05, + "loss": 0.0406, + "step": 8210, + "task_loss": 0.07024012506008148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799998360595037, + "compression_loss": 0.0, + "distillation_loss": 0.027419112622737885, + "epoch": 7.8, + "learning_rate": 4.843346079588089e-05, + "loss": 0.0254, + "step": 8211, + "task_loss": 0.00690159946680069 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999983834701229, + "compression_loss": 0.0, + "distillation_loss": 0.03823021054267883, + "epoch": 7.8, + "learning_rate": 4.842974616430665e-05, + "loss": 0.0443, + "step": 8212, + "task_loss": 0.09891167283058167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999984061314245, + "compression_loss": 0.0, + "distillation_loss": 0.022461410611867905, + "epoch": 7.8, + "learning_rate": 4.842602727660618e-05, + "loss": 0.0244, + "step": 8213, + "task_loss": 0.04227849468588829 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999984285799454, + "compression_loss": 0.0, + "distillation_loss": 0.06684540212154388, + "epoch": 7.8, + "learning_rate": 4.842230413345503e-05, + "loss": 0.0692, + "step": 8214, + "task_loss": 0.09044404327869415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999984508166894, + "compression_loss": 0.0, + "distillation_loss": 0.041791971772909164, + "epoch": 7.8, + "learning_rate": 4.8418576735529535e-05, + "loss": 0.0441, + "step": 8215, + "task_loss": 0.06479668617248535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999984728426602, + "compression_loss": 0.0, + "distillation_loss": 0.03619157522916794, + "epoch": 7.8, + "learning_rate": 4.841484508350679e-05, + "loss": 0.0353, + "step": 8216, + "task_loss": 0.02702299691736698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999984946588615, + "compression_loss": 0.0, + "distillation_loss": 0.0437760166823864, + "epoch": 7.8, + "learning_rate": 4.841110917806467e-05, + "loss": 0.0542, + "step": 8217, + "task_loss": 0.14790481328964233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999985162662969, + "compression_loss": 0.0, + "distillation_loss": 0.03585965931415558, + "epoch": 7.8, + "learning_rate": 4.840736901988182e-05, + "loss": 0.0396, + "step": 8218, + "task_loss": 0.07364504784345627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999985376659701, + "compression_loss": 0.0, + "distillation_loss": 0.12400738894939423, + "epoch": 7.81, + "learning_rate": 4.840362460963765e-05, + "loss": 0.1329, + "step": 8219, + "task_loss": 0.21300512552261353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999985588588847, + "compression_loss": 0.0, + "distillation_loss": 0.04792033135890961, + "epoch": 7.81, + "learning_rate": 4.8399875948012355e-05, + "loss": 0.0518, + "step": 8220, + "task_loss": 0.0868147611618042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999985798460446, + "compression_loss": 0.0, + "distillation_loss": 0.04659024253487587, + "epoch": 7.81, + "learning_rate": 4.8396123035686906e-05, + "loss": 0.0427, + "step": 8221, + "task_loss": 0.007754124701023102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999986006284533, + "compression_loss": 0.0, + "distillation_loss": 0.19927890598773956, + "epoch": 7.81, + "learning_rate": 4.839236587334303e-05, + "loss": 0.2014, + "step": 8222, + "task_loss": 0.22048905491828918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999986212071145, + "compression_loss": 0.0, + "distillation_loss": 0.08156438916921616, + "epoch": 7.81, + "learning_rate": 4.8388604461663236e-05, + "loss": 0.0814, + "step": 8223, + "task_loss": 0.08026103675365448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799998641583032, + "compression_loss": 0.0, + "distillation_loss": 0.03177830949425697, + "epoch": 7.81, + "learning_rate": 4.838483880133079e-05, + "loss": 0.0392, + "step": 8224, + "task_loss": 0.10596133768558502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999986617572094, + "compression_loss": 0.0, + "distillation_loss": 0.23754796385765076, + "epoch": 7.81, + "learning_rate": 4.8381068893029766e-05, + "loss": 0.2324, + "step": 8225, + "task_loss": 0.18612830340862274 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999986817306503, + "compression_loss": 0.0, + "distillation_loss": 0.04634622856974602, + "epoch": 7.81, + "learning_rate": 4.837729473744497e-05, + "loss": 0.0424, + "step": 8226, + "task_loss": 0.0071801114827394485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999987015043585, + "compression_loss": 0.0, + "distillation_loss": 0.03712467849254608, + "epoch": 7.81, + "learning_rate": 4.8373516335261994e-05, + "loss": 0.034, + "step": 8227, + "task_loss": 0.0057260747998952866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999987210793376, + "compression_loss": 0.0, + "distillation_loss": 0.038928285241127014, + "epoch": 7.81, + "learning_rate": 4.8369733687167204e-05, + "loss": 0.0372, + "step": 8228, + "task_loss": 0.021699724718928337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999987404565914, + "compression_loss": 0.0, + "distillation_loss": 0.022505152970552444, + "epoch": 7.81, + "learning_rate": 4.836594679384775e-05, + "loss": 0.0259, + "step": 8229, + "task_loss": 0.056766681373119354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999987596371234, + "compression_loss": 0.0, + "distillation_loss": 0.10139244794845581, + "epoch": 7.82, + "learning_rate": 4.836215565599152e-05, + "loss": 0.096, + "step": 8230, + "task_loss": 0.0474693700671196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999987786219375, + "compression_loss": 0.0, + "distillation_loss": 0.14026379585266113, + "epoch": 7.82, + "learning_rate": 4.835836027428722e-05, + "loss": 0.1379, + "step": 8231, + "task_loss": 0.11672936379909515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999987974120373, + "compression_loss": 0.0, + "distillation_loss": 0.10862313210964203, + "epoch": 7.82, + "learning_rate": 4.8354560649424264e-05, + "loss": 0.1071, + "step": 8232, + "task_loss": 0.09331917762756348 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999988160084264, + "compression_loss": 0.0, + "distillation_loss": 0.01986592821776867, + "epoch": 7.82, + "learning_rate": 4.8350756782092894e-05, + "loss": 0.026, + "step": 8233, + "task_loss": 0.08163642883300781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999988344121085, + "compression_loss": 0.0, + "distillation_loss": 0.05118180066347122, + "epoch": 7.82, + "learning_rate": 4.8346948672984096e-05, + "loss": 0.0483, + "step": 8234, + "task_loss": 0.0220907311886549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999988526240873, + "compression_loss": 0.0, + "distillation_loss": 0.14736244082450867, + "epoch": 7.82, + "learning_rate": 4.8343136322789626e-05, + "loss": 0.1584, + "step": 8235, + "task_loss": 0.2577117383480072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999988706453666, + "compression_loss": 0.0, + "distillation_loss": 0.1561247706413269, + "epoch": 7.82, + "learning_rate": 4.8339319732202024e-05, + "loss": 0.1704, + "step": 8236, + "task_loss": 0.29920265078544617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79999888847695, + "compression_loss": 0.0, + "distillation_loss": 0.02194085717201233, + "epoch": 7.82, + "learning_rate": 4.83354989019146e-05, + "loss": 0.0284, + "step": 8237, + "task_loss": 0.0863027274608612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799998906119841, + "compression_loss": 0.0, + "distillation_loss": 0.012062850408256054, + "epoch": 7.82, + "learning_rate": 4.83316738326214e-05, + "loss": 0.0113, + "step": 8238, + "task_loss": 0.004339534789323807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999989235750437, + "compression_loss": 0.0, + "distillation_loss": 0.11506865918636322, + "epoch": 7.82, + "learning_rate": 4.832784452501729e-05, + "loss": 0.1089, + "step": 8239, + "task_loss": 0.05316751450300217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999989408435614, + "compression_loss": 0.0, + "distillation_loss": 0.02399417757987976, + "epoch": 7.83, + "learning_rate": 4.8324010979797875e-05, + "loss": 0.0228, + "step": 8240, + "task_loss": 0.012368382886052132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999989579263979, + "compression_loss": 0.0, + "distillation_loss": 0.08361876010894775, + "epoch": 7.83, + "learning_rate": 4.8320173197659534e-05, + "loss": 0.0955, + "step": 8241, + "task_loss": 0.20286312699317932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999989748245568, + "compression_loss": 0.0, + "distillation_loss": 0.03970951959490776, + "epoch": 7.83, + "learning_rate": 4.831633117929942e-05, + "loss": 0.0364, + "step": 8242, + "task_loss": 0.006229208782315254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799998991539042, + "compression_loss": 0.0, + "distillation_loss": 0.01506102830171585, + "epoch": 7.83, + "learning_rate": 4.831248492541545e-05, + "loss": 0.0141, + "step": 8243, + "task_loss": 0.005801372230052948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999008070857, + "compression_loss": 0.0, + "distillation_loss": 0.040881797671318054, + "epoch": 7.83, + "learning_rate": 4.830863443670632e-05, + "loss": 0.0392, + "step": 8244, + "task_loss": 0.023911267518997192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999990244210056, + "compression_loss": 0.0, + "distillation_loss": 0.01167452521622181, + "epoch": 7.83, + "learning_rate": 4.8304779713871495e-05, + "loss": 0.0109, + "step": 8245, + "task_loss": 0.0038937367498874664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999990405904914, + "compression_loss": 0.0, + "distillation_loss": 0.15877902507781982, + "epoch": 7.83, + "learning_rate": 4.83009207576112e-05, + "loss": 0.1789, + "step": 8246, + "task_loss": 0.3599551320075989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999990565803181, + "compression_loss": 0.0, + "distillation_loss": 0.03877423331141472, + "epoch": 7.83, + "learning_rate": 4.829705756862642e-05, + "loss": 0.0432, + "step": 8247, + "task_loss": 0.08290007710456848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999990723914894, + "compression_loss": 0.0, + "distillation_loss": 0.12737919390201569, + "epoch": 7.83, + "learning_rate": 4.829319014761894e-05, + "loss": 0.1222, + "step": 8248, + "task_loss": 0.07570772618055344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999088025009, + "compression_loss": 0.0, + "distillation_loss": 0.07020660489797592, + "epoch": 7.83, + "learning_rate": 4.828931849529129e-05, + "loss": 0.0701, + "step": 8249, + "task_loss": 0.06908702105283737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991034818805, + "compression_loss": 0.0, + "distillation_loss": 0.10183189064264297, + "epoch": 7.83, + "learning_rate": 4.8285442612346774e-05, + "loss": 0.0971, + "step": 8250, + "task_loss": 0.05413543060421944 + }, + { + "epoch": 7.83, + "eval_accuracy": 0.8899082568807339, + "eval_loss": 0.4171345829963684, + "eval_runtime": 18.0817, + "eval_samples_per_second": 48.225, + "eval_steps_per_second": 6.028, + "step": 8250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991187631076, + "compression_loss": 0.0, + "distillation_loss": 0.07504773885011673, + "epoch": 7.84, + "learning_rate": 4.828156249948946e-05, + "loss": 0.0727, + "step": 8251, + "task_loss": 0.051514700055122375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991338696941, + "compression_loss": 0.0, + "distillation_loss": 0.022689666599035263, + "epoch": 7.84, + "learning_rate": 4.827767815742419e-05, + "loss": 0.031, + "step": 8252, + "task_loss": 0.10627881437540054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991488026436, + "compression_loss": 0.0, + "distillation_loss": 0.045913174748420715, + "epoch": 7.84, + "learning_rate": 4.8273789586856574e-05, + "loss": 0.0693, + "step": 8253, + "task_loss": 0.27937787771224976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991635629597, + "compression_loss": 0.0, + "distillation_loss": 0.08047342300415039, + "epoch": 7.84, + "learning_rate": 4.8269896788493e-05, + "loss": 0.0857, + "step": 8254, + "task_loss": 0.1325674057006836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991781516462, + "compression_loss": 0.0, + "distillation_loss": 0.02534073404967785, + "epoch": 7.84, + "learning_rate": 4.8265999763040603e-05, + "loss": 0.0333, + "step": 8255, + "task_loss": 0.10540292412042618 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999991925697068, + "compression_loss": 0.0, + "distillation_loss": 0.08169533312320709, + "epoch": 7.84, + "learning_rate": 4.8262098511207295e-05, + "loss": 0.0787, + "step": 8256, + "task_loss": 0.0512889064848423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999992068181451, + "compression_loss": 0.0, + "distillation_loss": 0.14142774045467377, + "epoch": 7.84, + "learning_rate": 4.825819303370177e-05, + "loss": 0.1345, + "step": 8257, + "task_loss": 0.07237007468938828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999992208979647, + "compression_loss": 0.0, + "distillation_loss": 0.026454931125044823, + "epoch": 7.84, + "learning_rate": 4.8254283331233464e-05, + "loss": 0.0294, + "step": 8258, + "task_loss": 0.05604104697704315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999992348101694, + "compression_loss": 0.0, + "distillation_loss": 0.022744864225387573, + "epoch": 7.84, + "learning_rate": 4.825036940451259e-05, + "loss": 0.0312, + "step": 8259, + "task_loss": 0.10733005404472351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999248555763, + "compression_loss": 0.0, + "distillation_loss": 0.19848661124706268, + "epoch": 7.84, + "learning_rate": 4.8246451254250145e-05, + "loss": 0.1952, + "step": 8260, + "task_loss": 0.1654394567012787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999262135749, + "compression_loss": 0.0, + "distillation_loss": 0.038717396557331085, + "epoch": 7.85, + "learning_rate": 4.8242528881157866e-05, + "loss": 0.0535, + "step": 8261, + "task_loss": 0.18647165596485138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999992755511312, + "compression_loss": 0.0, + "distillation_loss": 0.07267004996538162, + "epoch": 7.85, + "learning_rate": 4.823860228594829e-05, + "loss": 0.0775, + "step": 8262, + "task_loss": 0.1208515465259552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999992888029132, + "compression_loss": 0.0, + "distillation_loss": 0.07859297096729279, + "epoch": 7.85, + "learning_rate": 4.823467146933468e-05, + "loss": 0.0764, + "step": 8263, + "task_loss": 0.05707138776779175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999993018920986, + "compression_loss": 0.0, + "distillation_loss": 0.015503250993788242, + "epoch": 7.85, + "learning_rate": 4.823073643203111e-05, + "loss": 0.0248, + "step": 8264, + "task_loss": 0.10833414644002914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999993148196912, + "compression_loss": 0.0, + "distillation_loss": 0.17332197725772858, + "epoch": 7.85, + "learning_rate": 4.822679717475237e-05, + "loss": 0.1677, + "step": 8265, + "task_loss": 0.11748655885457993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999993275866947, + "compression_loss": 0.0, + "distillation_loss": 0.031857073307037354, + "epoch": 7.85, + "learning_rate": 4.8222853698214076e-05, + "loss": 0.0292, + "step": 8266, + "task_loss": 0.005062129348516464 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999993401941128, + "compression_loss": 0.0, + "distillation_loss": 0.02804490551352501, + "epoch": 7.85, + "learning_rate": 4.8218906003132555e-05, + "loss": 0.0305, + "step": 8267, + "task_loss": 0.05293525010347366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999993526429491, + "compression_loss": 0.0, + "distillation_loss": 0.024678591638803482, + "epoch": 7.85, + "learning_rate": 4.8214954090224946e-05, + "loss": 0.0295, + "step": 8268, + "task_loss": 0.07294875383377075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999993649342072, + "compression_loss": 0.0, + "distillation_loss": 0.08018738776445389, + "epoch": 7.85, + "learning_rate": 4.8210997960209114e-05, + "loss": 0.0856, + "step": 8269, + "task_loss": 0.13434135913848877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999377068891, + "compression_loss": 0.0, + "distillation_loss": 0.021488351747393608, + "epoch": 7.85, + "learning_rate": 4.8207037613803715e-05, + "loss": 0.0198, + "step": 8270, + "task_loss": 0.00473182275891304 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999389048004, + "compression_loss": 0.0, + "distillation_loss": 0.1669892966747284, + "epoch": 7.85, + "learning_rate": 4.820307305172818e-05, + "loss": 0.1599, + "step": 8271, + "task_loss": 0.09594349563121796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994008725501, + "compression_loss": 0.0, + "distillation_loss": 0.039625514298677444, + "epoch": 7.86, + "learning_rate": 4.8199104274702666e-05, + "loss": 0.054, + "step": 8272, + "task_loss": 0.18374371528625488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994125435328, + "compression_loss": 0.0, + "distillation_loss": 0.01832488365471363, + "epoch": 7.86, + "learning_rate": 4.819513128344814e-05, + "loss": 0.0221, + "step": 8273, + "task_loss": 0.055578745901584625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994240619557, + "compression_loss": 0.0, + "distillation_loss": 0.1487281322479248, + "epoch": 7.86, + "learning_rate": 4.8191154078686306e-05, + "loss": 0.1573, + "step": 8274, + "task_loss": 0.23400062322616577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994354288227, + "compression_loss": 0.0, + "distillation_loss": 0.101641446352005, + "epoch": 7.86, + "learning_rate": 4.8187172661139636e-05, + "loss": 0.1103, + "step": 8275, + "task_loss": 0.18786770105361938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994466451373, + "compression_loss": 0.0, + "distillation_loss": 0.019857624545693398, + "epoch": 7.86, + "learning_rate": 4.818318703153139e-05, + "loss": 0.0187, + "step": 8276, + "task_loss": 0.00832618772983551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994577119034, + "compression_loss": 0.0, + "distillation_loss": 0.034556448459625244, + "epoch": 7.86, + "learning_rate": 4.817919719058557e-05, + "loss": 0.0398, + "step": 8277, + "task_loss": 0.08679534494876862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994686301245, + "compression_loss": 0.0, + "distillation_loss": 0.08879555761814117, + "epoch": 7.86, + "learning_rate": 4.8175203139026934e-05, + "loss": 0.0846, + "step": 8278, + "task_loss": 0.046484023332595825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994794008043, + "compression_loss": 0.0, + "distillation_loss": 0.035275399684906006, + "epoch": 7.86, + "learning_rate": 4.817120487758104e-05, + "loss": 0.0353, + "step": 8279, + "task_loss": 0.03573717176914215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999994900249465, + "compression_loss": 0.0, + "distillation_loss": 0.05630365014076233, + "epoch": 7.86, + "learning_rate": 4.81672024069742e-05, + "loss": 0.0763, + "step": 8280, + "task_loss": 0.2562328577041626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999500503555, + "compression_loss": 0.0, + "distillation_loss": 0.03214915469288826, + "epoch": 7.86, + "learning_rate": 4.816319572793345e-05, + "loss": 0.0307, + "step": 8281, + "task_loss": 0.01762857846915722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999510837633, + "compression_loss": 0.0, + "distillation_loss": 0.12708397209644318, + "epoch": 7.87, + "learning_rate": 4.815918484118665e-05, + "loss": 0.1273, + "step": 8282, + "task_loss": 0.1288326233625412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995210281846, + "compression_loss": 0.0, + "distillation_loss": 0.03236864507198334, + "epoch": 7.87, + "learning_rate": 4.815516974746239e-05, + "loss": 0.0354, + "step": 8283, + "task_loss": 0.06306421756744385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995310762132, + "compression_loss": 0.0, + "distillation_loss": 0.20854994654655457, + "epoch": 7.87, + "learning_rate": 4.815115044749003e-05, + "loss": 0.2032, + "step": 8284, + "task_loss": 0.15457791090011597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995409827229, + "compression_loss": 0.0, + "distillation_loss": 0.10258646309375763, + "epoch": 7.87, + "learning_rate": 4.814712694199969e-05, + "loss": 0.1004, + "step": 8285, + "task_loss": 0.08033056557178497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995507487169, + "compression_loss": 0.0, + "distillation_loss": 0.009589407593011856, + "epoch": 7.87, + "learning_rate": 4.814309923172227e-05, + "loss": 0.0089, + "step": 8286, + "task_loss": 0.003172697499394417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995603751991, + "compression_loss": 0.0, + "distillation_loss": 0.07792666554450989, + "epoch": 7.87, + "learning_rate": 4.81390673173894e-05, + "loss": 0.0795, + "step": 8287, + "task_loss": 0.09386920928955078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995698631732, + "compression_loss": 0.0, + "distillation_loss": 0.19815564155578613, + "epoch": 7.87, + "learning_rate": 4.8135031199733524e-05, + "loss": 0.2015, + "step": 8288, + "task_loss": 0.2318621575832367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995792136428, + "compression_loss": 0.0, + "distillation_loss": 0.06799386441707611, + "epoch": 7.87, + "learning_rate": 4.813099087948781e-05, + "loss": 0.0688, + "step": 8289, + "task_loss": 0.07573655992746353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995884276118, + "compression_loss": 0.0, + "distillation_loss": 0.021318640559911728, + "epoch": 7.87, + "learning_rate": 4.812694635738621e-05, + "loss": 0.0198, + "step": 8290, + "task_loss": 0.0060688890516757965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999995975060836, + "compression_loss": 0.0, + "distillation_loss": 0.06302770227193832, + "epoch": 7.87, + "learning_rate": 4.812289763416341e-05, + "loss": 0.0652, + "step": 8291, + "task_loss": 0.08448217064142227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999606450062, + "compression_loss": 0.0, + "distillation_loss": 0.07862488925457001, + "epoch": 7.87, + "learning_rate": 4.81188447105549e-05, + "loss": 0.0874, + "step": 8292, + "task_loss": 0.16682052612304688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996152605506, + "compression_loss": 0.0, + "distillation_loss": 0.024814866483211517, + "epoch": 7.88, + "learning_rate": 4.811478758729691e-05, + "loss": 0.0228, + "step": 8293, + "task_loss": 0.004621252417564392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996239385534, + "compression_loss": 0.0, + "distillation_loss": 0.04633516073226929, + "epoch": 7.88, + "learning_rate": 4.811072626512642e-05, + "loss": 0.0477, + "step": 8294, + "task_loss": 0.06027062237262726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996324850737, + "compression_loss": 0.0, + "distillation_loss": 0.12473642826080322, + "epoch": 7.88, + "learning_rate": 4.810666074478121e-05, + "loss": 0.123, + "step": 8295, + "task_loss": 0.1078307181596756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996409011153, + "compression_loss": 0.0, + "distillation_loss": 0.0652468129992485, + "epoch": 7.88, + "learning_rate": 4.8102591026999796e-05, + "loss": 0.0725, + "step": 8296, + "task_loss": 0.1382291316986084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999649187682, + "compression_loss": 0.0, + "distillation_loss": 0.011844221502542496, + "epoch": 7.88, + "learning_rate": 4.8098517112521456e-05, + "loss": 0.025, + "step": 8297, + "task_loss": 0.1432102769613266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996573457773, + "compression_loss": 0.0, + "distillation_loss": 0.11007647216320038, + "epoch": 7.88, + "learning_rate": 4.8094439002086234e-05, + "loss": 0.1154, + "step": 8298, + "task_loss": 0.16329604387283325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999665376405, + "compression_loss": 0.0, + "distillation_loss": 0.03129512071609497, + "epoch": 7.88, + "learning_rate": 4.809035669643495e-05, + "loss": 0.0287, + "step": 8299, + "task_loss": 0.004948470741510391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996732805688, + "compression_loss": 0.0, + "distillation_loss": 0.03155820071697235, + "epoch": 7.88, + "learning_rate": 4.808627019630917e-05, + "loss": 0.0389, + "step": 8300, + "task_loss": 0.10458790510892868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996810592722, + "compression_loss": 0.0, + "distillation_loss": 0.018336599692702293, + "epoch": 7.88, + "learning_rate": 4.808217950245122e-05, + "loss": 0.0173, + "step": 8301, + "task_loss": 0.008344225585460663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996887135191, + "compression_loss": 0.0, + "distillation_loss": 0.1453208029270172, + "epoch": 7.88, + "learning_rate": 4.807808461560419e-05, + "loss": 0.153, + "step": 8302, + "task_loss": 0.22258791327476501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999996962443132, + "compression_loss": 0.0, + "distillation_loss": 0.026547754183411598, + "epoch": 7.89, + "learning_rate": 4.8073985536511956e-05, + "loss": 0.0299, + "step": 8303, + "task_loss": 0.059667013585567474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997036526579, + "compression_loss": 0.0, + "distillation_loss": 0.054472848773002625, + "epoch": 7.89, + "learning_rate": 4.806988226591912e-05, + "loss": 0.0525, + "step": 8304, + "task_loss": 0.03490680456161499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997109395572, + "compression_loss": 0.0, + "distillation_loss": 0.030267203226685524, + "epoch": 7.89, + "learning_rate": 4.806577480457106e-05, + "loss": 0.0307, + "step": 8305, + "task_loss": 0.034910961985588074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997181060147, + "compression_loss": 0.0, + "distillation_loss": 0.08685198426246643, + "epoch": 7.89, + "learning_rate": 4.8061663153213935e-05, + "loss": 0.0862, + "step": 8306, + "task_loss": 0.07995598018169403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999725153034, + "compression_loss": 0.0, + "distillation_loss": 0.05820311978459358, + "epoch": 7.89, + "learning_rate": 4.805754731259462e-05, + "loss": 0.0552, + "step": 8307, + "task_loss": 0.02777511440217495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997320816187, + "compression_loss": 0.0, + "distillation_loss": 0.0553501695394516, + "epoch": 7.89, + "learning_rate": 4.805342728346079e-05, + "loss": 0.0533, + "step": 8308, + "task_loss": 0.034611959010362625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997388927726, + "compression_loss": 0.0, + "distillation_loss": 0.06473010778427124, + "epoch": 7.89, + "learning_rate": 4.804930306656087e-05, + "loss": 0.0656, + "step": 8309, + "task_loss": 0.07328800857067108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997455874995, + "compression_loss": 0.0, + "distillation_loss": 0.052684955298900604, + "epoch": 7.89, + "learning_rate": 4.804517466264405e-05, + "loss": 0.055, + "step": 8310, + "task_loss": 0.07596210390329361 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997521668029, + "compression_loss": 0.0, + "distillation_loss": 0.1126926839351654, + "epoch": 7.89, + "learning_rate": 4.8041042072460244e-05, + "loss": 0.1187, + "step": 8311, + "task_loss": 0.17313697934150696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997586316866, + "compression_loss": 0.0, + "distillation_loss": 0.2039157748222351, + "epoch": 7.89, + "learning_rate": 4.803690529676019e-05, + "loss": 0.2047, + "step": 8312, + "task_loss": 0.211942657828331 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997649831541, + "compression_loss": 0.0, + "distillation_loss": 0.04965321347117424, + "epoch": 7.89, + "learning_rate": 4.803276433629534e-05, + "loss": 0.0641, + "step": 8313, + "task_loss": 0.19365498423576355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997712222093, + "compression_loss": 0.0, + "distillation_loss": 0.028864435851573944, + "epoch": 7.9, + "learning_rate": 4.802861919181793e-05, + "loss": 0.0268, + "step": 8314, + "task_loss": 0.007797591388225555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997773498557, + "compression_loss": 0.0, + "distillation_loss": 0.06014259159564972, + "epoch": 7.9, + "learning_rate": 4.802446986408093e-05, + "loss": 0.0624, + "step": 8315, + "task_loss": 0.0831136405467987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997833670972, + "compression_loss": 0.0, + "distillation_loss": 0.03688354790210724, + "epoch": 7.9, + "learning_rate": 4.8020316353838095e-05, + "loss": 0.0529, + "step": 8316, + "task_loss": 0.19723045825958252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997892749372, + "compression_loss": 0.0, + "distillation_loss": 0.029626084491610527, + "epoch": 7.9, + "learning_rate": 4.8016158661843926e-05, + "loss": 0.0273, + "step": 8317, + "task_loss": 0.006546778604388237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999997950743797, + "compression_loss": 0.0, + "distillation_loss": 0.02643163688480854, + "epoch": 7.9, + "learning_rate": 4.8011996788853686e-05, + "loss": 0.0339, + "step": 8318, + "task_loss": 0.10074707865715027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998007664281, + "compression_loss": 0.0, + "distillation_loss": 0.021654170006513596, + "epoch": 7.9, + "learning_rate": 4.80078307356234e-05, + "loss": 0.0306, + "step": 8319, + "task_loss": 0.1115964949131012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998063520862, + "compression_loss": 0.0, + "distillation_loss": 0.020197220146656036, + "epoch": 7.9, + "learning_rate": 4.800366050290986e-05, + "loss": 0.0187, + "step": 8320, + "task_loss": 0.005556017160415649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998118323577, + "compression_loss": 0.0, + "distillation_loss": 0.015532903373241425, + "epoch": 7.9, + "learning_rate": 4.799948609147061e-05, + "loss": 0.0234, + "step": 8321, + "task_loss": 0.09371798485517502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998172082463, + "compression_loss": 0.0, + "distillation_loss": 0.041199732571840286, + "epoch": 7.9, + "learning_rate": 4.7995307502063936e-05, + "loss": 0.0383, + "step": 8322, + "task_loss": 0.011793764308094978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998224807556, + "compression_loss": 0.0, + "distillation_loss": 0.11037556827068329, + "epoch": 7.9, + "learning_rate": 4.799112473544891e-05, + "loss": 0.1096, + "step": 8323, + "task_loss": 0.1021459624171257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998276508894, + "compression_loss": 0.0, + "distillation_loss": 0.05237120762467384, + "epoch": 7.91, + "learning_rate": 4.7986937792385344e-05, + "loss": 0.0535, + "step": 8324, + "task_loss": 0.0640583410859108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998327196512, + "compression_loss": 0.0, + "distillation_loss": 0.0475940927863121, + "epoch": 7.91, + "learning_rate": 4.798274667363383e-05, + "loss": 0.0521, + "step": 8325, + "task_loss": 0.09266771376132965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998376880448, + "compression_loss": 0.0, + "distillation_loss": 0.022809145972132683, + "epoch": 7.91, + "learning_rate": 4.7978551379955684e-05, + "loss": 0.0212, + "step": 8326, + "task_loss": 0.006463898345828056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999842557074, + "compression_loss": 0.0, + "distillation_loss": 0.11664307862520218, + "epoch": 7.91, + "learning_rate": 4.797435191211302e-05, + "loss": 0.1186, + "step": 8327, + "task_loss": 0.13656139373779297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998473277422, + "compression_loss": 0.0, + "distillation_loss": 0.047932952642440796, + "epoch": 7.91, + "learning_rate": 4.797014827086869e-05, + "loss": 0.0526, + "step": 8328, + "task_loss": 0.09455284476280212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998520010533, + "compression_loss": 0.0, + "distillation_loss": 0.0719044953584671, + "epoch": 7.91, + "learning_rate": 4.79659404569863e-05, + "loss": 0.0775, + "step": 8329, + "task_loss": 0.1278771162033081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999856578011, + "compression_loss": 0.0, + "distillation_loss": 0.010797183960676193, + "epoch": 7.91, + "learning_rate": 4.7961728471230214e-05, + "loss": 0.0099, + "step": 8330, + "task_loss": 0.0018469560891389847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998610596187, + "compression_loss": 0.0, + "distillation_loss": 0.0197126641869545, + "epoch": 7.91, + "learning_rate": 4.7957512314365574e-05, + "loss": 0.0256, + "step": 8331, + "task_loss": 0.07906901836395264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998654468805, + "compression_loss": 0.0, + "distillation_loss": 0.02190200798213482, + "epoch": 7.91, + "learning_rate": 4.7953291987158254e-05, + "loss": 0.0201, + "step": 8332, + "task_loss": 0.003983369097113609 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998697407997, + "compression_loss": 0.0, + "distillation_loss": 0.15597021579742432, + "epoch": 7.91, + "learning_rate": 4.79490674903749e-05, + "loss": 0.1605, + "step": 8333, + "task_loss": 0.20110704004764557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998739423803, + "compression_loss": 0.0, + "distillation_loss": 0.020552337169647217, + "epoch": 7.91, + "learning_rate": 4.7944838824782916e-05, + "loss": 0.0292, + "step": 8334, + "task_loss": 0.10749036073684692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998780526257, + "compression_loss": 0.0, + "distillation_loss": 0.02331959456205368, + "epoch": 7.92, + "learning_rate": 4.794060599115045e-05, + "loss": 0.0217, + "step": 8335, + "task_loss": 0.0067675188183784485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998820725398, + "compression_loss": 0.0, + "distillation_loss": 0.012297404929995537, + "epoch": 7.92, + "learning_rate": 4.793636899024643e-05, + "loss": 0.019, + "step": 8336, + "task_loss": 0.07974053174257278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998860031262, + "compression_loss": 0.0, + "distillation_loss": 0.17458131909370422, + "epoch": 7.92, + "learning_rate": 4.7932127822840516e-05, + "loss": 0.1665, + "step": 8337, + "task_loss": 0.09329235553741455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998898453885, + "compression_loss": 0.0, + "distillation_loss": 0.016357656568288803, + "epoch": 7.92, + "learning_rate": 4.792788248970314e-05, + "loss": 0.0217, + "step": 8338, + "task_loss": 0.06964881718158722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998936003305, + "compression_loss": 0.0, + "distillation_loss": 0.10674792528152466, + "epoch": 7.92, + "learning_rate": 4.79236329916055e-05, + "loss": 0.1097, + "step": 8339, + "task_loss": 0.13648146390914917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999998972689558, + "compression_loss": 0.0, + "distillation_loss": 0.014713255688548088, + "epoch": 7.92, + "learning_rate": 4.79193793293195e-05, + "loss": 0.0227, + "step": 8340, + "task_loss": 0.09422945976257324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999008522681, + "compression_loss": 0.0, + "distillation_loss": 0.014077425003051758, + "epoch": 7.92, + "learning_rate": 4.791512150361788e-05, + "loss": 0.0292, + "step": 8341, + "task_loss": 0.1649925708770752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999043512712, + "compression_loss": 0.0, + "distillation_loss": 0.08245790749788284, + "epoch": 7.92, + "learning_rate": 4.791085951527408e-05, + "loss": 0.0749, + "step": 8342, + "task_loss": 0.006448717787861824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999077669687, + "compression_loss": 0.0, + "distillation_loss": 0.08674632757902145, + "epoch": 7.92, + "learning_rate": 4.7906593365062304e-05, + "loss": 0.0895, + "step": 8343, + "task_loss": 0.11430380493402481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999111003642, + "compression_loss": 0.0, + "distillation_loss": 0.06291348487138748, + "epoch": 7.92, + "learning_rate": 4.790232305375752e-05, + "loss": 0.061, + "step": 8344, + "task_loss": 0.04396482929587364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999143524614, + "compression_loss": 0.0, + "distillation_loss": 0.07400640845298767, + "epoch": 7.92, + "learning_rate": 4.789804858213547e-05, + "loss": 0.0753, + "step": 8345, + "task_loss": 0.08731956779956818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999175242641, + "compression_loss": 0.0, + "distillation_loss": 0.014547398313879967, + "epoch": 7.93, + "learning_rate": 4.7893769950972605e-05, + "loss": 0.0196, + "step": 8346, + "task_loss": 0.06489875912666321 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999206167759, + "compression_loss": 0.0, + "distillation_loss": 0.02471366710960865, + "epoch": 7.93, + "learning_rate": 4.788948716104618e-05, + "loss": 0.0266, + "step": 8347, + "task_loss": 0.043872442096471786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999236310006, + "compression_loss": 0.0, + "distillation_loss": 0.032343026250600815, + "epoch": 7.93, + "learning_rate": 4.7885200213134164e-05, + "loss": 0.0309, + "step": 8348, + "task_loss": 0.01805000938475132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999265679416, + "compression_loss": 0.0, + "distillation_loss": 0.16060638427734375, + "epoch": 7.93, + "learning_rate": 4.788090910801532e-05, + "loss": 0.1635, + "step": 8349, + "task_loss": 0.18905338644981384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999294286029, + "compression_loss": 0.0, + "distillation_loss": 0.021082771942019463, + "epoch": 7.93, + "learning_rate": 4.787661384646913e-05, + "loss": 0.0198, + "step": 8350, + "task_loss": 0.008079813793301582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999932213988, + "compression_loss": 0.0, + "distillation_loss": 0.09513162821531296, + "epoch": 7.93, + "learning_rate": 4.787231442927587e-05, + "loss": 0.1033, + "step": 8351, + "task_loss": 0.17651014029979706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999349251006, + "compression_loss": 0.0, + "distillation_loss": 0.026541750878095627, + "epoch": 7.93, + "learning_rate": 4.786801085721654e-05, + "loss": 0.0317, + "step": 8352, + "task_loss": 0.07830867916345596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999375629444, + "compression_loss": 0.0, + "distillation_loss": 0.052206460386514664, + "epoch": 7.93, + "learning_rate": 4.78637031310729e-05, + "loss": 0.0684, + "step": 8353, + "task_loss": 0.21389958262443542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999401285232, + "compression_loss": 0.0, + "distillation_loss": 0.020280838012695312, + "epoch": 7.93, + "learning_rate": 4.7859391251627474e-05, + "loss": 0.0348, + "step": 8354, + "task_loss": 0.16505402326583862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999426228404, + "compression_loss": 0.0, + "distillation_loss": 0.04192467778921127, + "epoch": 7.93, + "learning_rate": 4.7855075219663535e-05, + "loss": 0.0557, + "step": 8355, + "task_loss": 0.17976054549217224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999450469, + "compression_loss": 0.0, + "distillation_loss": 0.029960088431835175, + "epoch": 7.94, + "learning_rate": 4.785075503596511e-05, + "loss": 0.0296, + "step": 8356, + "task_loss": 0.02586023323237896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999474017054, + "compression_loss": 0.0, + "distillation_loss": 0.041166506707668304, + "epoch": 7.94, + "learning_rate": 4.7846430701316994e-05, + "loss": 0.0383, + "step": 8357, + "task_loss": 0.012875651940703392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999496882605, + "compression_loss": 0.0, + "distillation_loss": 0.07181466370820999, + "epoch": 7.94, + "learning_rate": 4.78421022165047e-05, + "loss": 0.0763, + "step": 8358, + "task_loss": 0.11669700592756271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999519075689, + "compression_loss": 0.0, + "distillation_loss": 0.1549142599105835, + "epoch": 7.94, + "learning_rate": 4.783776958231453e-05, + "loss": 0.1549, + "step": 8359, + "task_loss": 0.1552504003047943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999540606343, + "compression_loss": 0.0, + "distillation_loss": 0.0709429532289505, + "epoch": 7.94, + "learning_rate": 4.783343279953353e-05, + "loss": 0.0802, + "step": 8360, + "task_loss": 0.16332530975341797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999561484603, + "compression_loss": 0.0, + "distillation_loss": 0.05141635984182358, + "epoch": 7.94, + "learning_rate": 4.782909186894949e-05, + "loss": 0.048, + "step": 8361, + "task_loss": 0.017139893025159836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999581720506, + "compression_loss": 0.0, + "distillation_loss": 0.08039192855358124, + "epoch": 7.94, + "learning_rate": 4.782474679135097e-05, + "loss": 0.0835, + "step": 8362, + "task_loss": 0.11189291626214981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999960132409, + "compression_loss": 0.0, + "distillation_loss": 0.03790656104683876, + "epoch": 7.94, + "learning_rate": 4.782039756752727e-05, + "loss": 0.0349, + "step": 8363, + "task_loss": 0.008079200983047485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999620305391, + "compression_loss": 0.0, + "distillation_loss": 0.03653489425778389, + "epoch": 7.94, + "learning_rate": 4.781604419826845e-05, + "loss": 0.043, + "step": 8364, + "task_loss": 0.10079368948936462 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999638674447, + "compression_loss": 0.0, + "distillation_loss": 0.06203051656484604, + "epoch": 7.94, + "learning_rate": 4.781168668436532e-05, + "loss": 0.0605, + "step": 8365, + "task_loss": 0.04713946580886841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999656441292, + "compression_loss": 0.0, + "distillation_loss": 0.1302792727947235, + "epoch": 7.94, + "learning_rate": 4.780732502660943e-05, + "loss": 0.1366, + "step": 8366, + "task_loss": 0.1930633783340454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999673615966, + "compression_loss": 0.0, + "distillation_loss": 0.1713554859161377, + "epoch": 7.95, + "learning_rate": 4.780295922579312e-05, + "loss": 0.1854, + "step": 8367, + "task_loss": 0.3117556571960449 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999690208504, + "compression_loss": 0.0, + "distillation_loss": 0.08704891055822372, + "epoch": 7.95, + "learning_rate": 4.779858928270944e-05, + "loss": 0.0917, + "step": 8368, + "task_loss": 0.13358929753303528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999706228943, + "compression_loss": 0.0, + "distillation_loss": 0.12303698807954788, + "epoch": 7.95, + "learning_rate": 4.7794215198152216e-05, + "loss": 0.1175, + "step": 8369, + "task_loss": 0.06791896373033524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999972168732, + "compression_loss": 0.0, + "distillation_loss": 0.09498666226863861, + "epoch": 7.95, + "learning_rate": 4.778983697291603e-05, + "loss": 0.091, + "step": 8370, + "task_loss": 0.05467312037944794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999736593671, + "compression_loss": 0.0, + "distillation_loss": 0.0775984525680542, + "epoch": 7.95, + "learning_rate": 4.7785454607796195e-05, + "loss": 0.0746, + "step": 8371, + "task_loss": 0.04722478240728378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999750958036, + "compression_loss": 0.0, + "distillation_loss": 0.09941750019788742, + "epoch": 7.95, + "learning_rate": 4.77810681035888e-05, + "loss": 0.0948, + "step": 8372, + "task_loss": 0.053179264068603516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999764790447, + "compression_loss": 0.0, + "distillation_loss": 0.08102773874998093, + "epoch": 7.95, + "learning_rate": 4.777667746109067e-05, + "loss": 0.0827, + "step": 8373, + "task_loss": 0.09824100136756897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999778100945, + "compression_loss": 0.0, + "distillation_loss": 0.06384574621915817, + "epoch": 7.95, + "learning_rate": 4.7772282681099377e-05, + "loss": 0.0756, + "step": 8374, + "task_loss": 0.18099090456962585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999790899565, + "compression_loss": 0.0, + "distillation_loss": 0.024305138736963272, + "epoch": 7.95, + "learning_rate": 4.7767883764413266e-05, + "loss": 0.0307, + "step": 8375, + "task_loss": 0.08778215944766998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999803196343, + "compression_loss": 0.0, + "distillation_loss": 0.029779810458421707, + "epoch": 7.95, + "learning_rate": 4.776348071183142e-05, + "loss": 0.0478, + "step": 8376, + "task_loss": 0.20959466695785522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999815001317, + "compression_loss": 0.0, + "distillation_loss": 0.06962715834379196, + "epoch": 7.96, + "learning_rate": 4.775907352415367e-05, + "loss": 0.0651, + "step": 8377, + "task_loss": 0.0247165709733963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999826324524, + "compression_loss": 0.0, + "distillation_loss": 0.14432743191719055, + "epoch": 7.96, + "learning_rate": 4.7754662202180606e-05, + "loss": 0.1392, + "step": 8378, + "task_loss": 0.0935506820678711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999837176001, + "compression_loss": 0.0, + "distillation_loss": 0.019188987091183662, + "epoch": 7.96, + "learning_rate": 4.7750246746713565e-05, + "loss": 0.0178, + "step": 8379, + "task_loss": 0.005380744114518166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999847565783, + "compression_loss": 0.0, + "distillation_loss": 0.04302148520946503, + "epoch": 7.96, + "learning_rate": 4.7745827158554634e-05, + "loss": 0.071, + "step": 8380, + "task_loss": 0.3228791356086731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999857503908, + "compression_loss": 0.0, + "distillation_loss": 0.033299144357442856, + "epoch": 7.96, + "learning_rate": 4.774140343850666e-05, + "loss": 0.0338, + "step": 8381, + "task_loss": 0.03854818642139435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999867000414, + "compression_loss": 0.0, + "distillation_loss": 0.16388925909996033, + "epoch": 7.96, + "learning_rate": 4.773697558737322e-05, + "loss": 0.1616, + "step": 8382, + "task_loss": 0.14093096554279327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999876065336, + "compression_loss": 0.0, + "distillation_loss": 0.11715799570083618, + "epoch": 7.96, + "learning_rate": 4.773254360595867e-05, + "loss": 0.1141, + "step": 8383, + "task_loss": 0.08705883473157883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999884708712, + "compression_loss": 0.0, + "distillation_loss": 0.07380795478820801, + "epoch": 7.96, + "learning_rate": 4.77281074950681e-05, + "loss": 0.0678, + "step": 8384, + "task_loss": 0.013948189094662666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999892940577, + "compression_loss": 0.0, + "distillation_loss": 0.15698346495628357, + "epoch": 7.96, + "learning_rate": 4.7723667255507334e-05, + "loss": 0.1518, + "step": 8385, + "task_loss": 0.10555059462785721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999990077097, + "compression_loss": 0.0, + "distillation_loss": 0.02515373006463051, + "epoch": 7.96, + "learning_rate": 4.771922288808297e-05, + "loss": 0.0234, + "step": 8386, + "task_loss": 0.007526658475399017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999908209927, + "compression_loss": 0.0, + "distillation_loss": 0.015803663060069084, + "epoch": 7.96, + "learning_rate": 4.771477439360235e-05, + "loss": 0.0145, + "step": 8387, + "task_loss": 0.002915032207965851 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999915267485, + "compression_loss": 0.0, + "distillation_loss": 0.12163722515106201, + "epoch": 7.97, + "learning_rate": 4.7710321772873566e-05, + "loss": 0.1286, + "step": 8388, + "task_loss": 0.19121067225933075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999921953681, + "compression_loss": 0.0, + "distillation_loss": 0.04631570726633072, + "epoch": 7.97, + "learning_rate": 4.770586502670546e-05, + "loss": 0.054, + "step": 8389, + "task_loss": 0.12329075485467911 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999992827855, + "compression_loss": 0.0, + "distillation_loss": 0.15134918689727783, + "epoch": 7.97, + "learning_rate": 4.770140415590762e-05, + "loss": 0.1518, + "step": 8390, + "task_loss": 0.15575477480888367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999934252132, + "compression_loss": 0.0, + "distillation_loss": 0.07986609637737274, + "epoch": 7.97, + "learning_rate": 4.769693916129039e-05, + "loss": 0.0843, + "step": 8391, + "task_loss": 0.12397737801074982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999939884461, + "compression_loss": 0.0, + "distillation_loss": 0.09135753661394119, + "epoch": 7.97, + "learning_rate": 4.769247004366485e-05, + "loss": 0.0998, + "step": 8392, + "task_loss": 0.1758865863084793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999945185575, + "compression_loss": 0.0, + "distillation_loss": 0.029954630881547928, + "epoch": 7.97, + "learning_rate": 4.768799680384283e-05, + "loss": 0.0286, + "step": 8393, + "task_loss": 0.016476107761263847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999950165512, + "compression_loss": 0.0, + "distillation_loss": 0.1209501326084137, + "epoch": 7.97, + "learning_rate": 4.768351944263693e-05, + "loss": 0.1188, + "step": 8394, + "task_loss": 0.09983272105455399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999954834306, + "compression_loss": 0.0, + "distillation_loss": 0.12946045398712158, + "epoch": 7.97, + "learning_rate": 4.767903796086048e-05, + "loss": 0.1457, + "step": 8395, + "task_loss": 0.29160839319229126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999959201997, + "compression_loss": 0.0, + "distillation_loss": 0.03893984854221344, + "epoch": 7.97, + "learning_rate": 4.767455235932756e-05, + "loss": 0.0484, + "step": 8396, + "task_loss": 0.13344644010066986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999963278618, + "compression_loss": 0.0, + "distillation_loss": 0.05474621057510376, + "epoch": 7.97, + "learning_rate": 4.7670062638853e-05, + "loss": 0.0528, + "step": 8397, + "task_loss": 0.03571630269289017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999996707421, + "compression_loss": 0.0, + "distillation_loss": 0.05862941965460777, + "epoch": 7.98, + "learning_rate": 4.766556880025238e-05, + "loss": 0.0592, + "step": 8398, + "task_loss": 0.06433902680873871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999970598807, + "compression_loss": 0.0, + "distillation_loss": 0.022588767111301422, + "epoch": 7.98, + "learning_rate": 4.7661070844342033e-05, + "loss": 0.0212, + "step": 8399, + "task_loss": 0.008752534165978432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999973862446, + "compression_loss": 0.0, + "distillation_loss": 0.0467553474009037, + "epoch": 7.98, + "learning_rate": 4.7656568771939024e-05, + "loss": 0.0514, + "step": 8400, + "task_loss": 0.09337884187698364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999976875165, + "compression_loss": 0.0, + "distillation_loss": 0.11045566201210022, + "epoch": 7.98, + "learning_rate": 4.765206258386119e-05, + "loss": 0.1095, + "step": 8401, + "task_loss": 0.10068619251251221 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999979647, + "compression_loss": 0.0, + "distillation_loss": 0.022844959050416946, + "epoch": 7.98, + "learning_rate": 4.7647552280927086e-05, + "loss": 0.0213, + "step": 8402, + "task_loss": 0.007824547588825226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999982187989, + "compression_loss": 0.0, + "distillation_loss": 0.038698434829711914, + "epoch": 7.98, + "learning_rate": 4.764303786395604e-05, + "loss": 0.0406, + "step": 8403, + "task_loss": 0.05789912864565849 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999984508167, + "compression_loss": 0.0, + "distillation_loss": 0.043944694101810455, + "epoch": 7.98, + "learning_rate": 4.763851933376812e-05, + "loss": 0.0417, + "step": 8404, + "task_loss": 0.02184183895587921 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999986617573, + "compression_loss": 0.0, + "distillation_loss": 0.012151426635682583, + "epoch": 7.98, + "learning_rate": 4.763399669118414e-05, + "loss": 0.0115, + "step": 8405, + "task_loss": 0.005970221012830734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999988526242, + "compression_loss": 0.0, + "distillation_loss": 0.0856751874089241, + "epoch": 7.98, + "learning_rate": 4.762946993702565e-05, + "loss": 0.0912, + "step": 8406, + "task_loss": 0.1414024531841278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999990244211, + "compression_loss": 0.0, + "distillation_loss": 0.059896957129240036, + "epoch": 7.98, + "learning_rate": 4.7624939072114954e-05, + "loss": 0.0598, + "step": 8407, + "task_loss": 0.05849459767341614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999991781517, + "compression_loss": 0.0, + "distillation_loss": 0.1813950538635254, + "epoch": 7.98, + "learning_rate": 4.762040409727512e-05, + "loss": 0.1745, + "step": 8408, + "task_loss": 0.11278204619884491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999993148197, + "compression_loss": 0.0, + "distillation_loss": 0.11629247665405273, + "epoch": 7.99, + "learning_rate": 4.761586501332994e-05, + "loss": 0.1225, + "step": 8409, + "task_loss": 0.17883270978927612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999994354289, + "compression_loss": 0.0, + "distillation_loss": 0.019268091768026352, + "epoch": 7.99, + "learning_rate": 4.7611321821103954e-05, + "loss": 0.0178, + "step": 8410, + "task_loss": 0.0050684306770563126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999995409828, + "compression_loss": 0.0, + "distillation_loss": 0.03275691345334053, + "epoch": 7.99, + "learning_rate": 4.760677452142247e-05, + "loss": 0.0434, + "step": 8411, + "task_loss": 0.13962477445602417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999996324851, + "compression_loss": 0.0, + "distillation_loss": 0.0782676413655281, + "epoch": 7.99, + "learning_rate": 4.760222311511152e-05, + "loss": 0.0755, + "step": 8412, + "task_loss": 0.05032962188124657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999997109396, + "compression_loss": 0.0, + "distillation_loss": 0.03344403952360153, + "epoch": 7.99, + "learning_rate": 4.759766760299788e-05, + "loss": 0.0368, + "step": 8413, + "task_loss": 0.06691578030586243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999997773499, + "compression_loss": 0.0, + "distillation_loss": 0.027902770787477493, + "epoch": 7.99, + "learning_rate": 4.759310798590909e-05, + "loss": 0.0359, + "step": 8414, + "task_loss": 0.10809982568025589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999998327197, + "compression_loss": 0.0, + "distillation_loss": 0.0703706368803978, + "epoch": 7.99, + "learning_rate": 4.758854426467343e-05, + "loss": 0.0753, + "step": 8415, + "task_loss": 0.11971607804298401 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999998780527, + "compression_loss": 0.0, + "distillation_loss": 0.06848819553852081, + "epoch": 7.99, + "learning_rate": 4.758397644011992e-05, + "loss": 0.0691, + "step": 8416, + "task_loss": 0.07492919266223907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999143526, + "compression_loss": 0.0, + "distillation_loss": 0.030622560530900955, + "epoch": 7.99, + "learning_rate": 4.757940451307831e-05, + "loss": 0.0384, + "step": 8417, + "task_loss": 0.10844360291957855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999426228, + "compression_loss": 0.0, + "distillation_loss": 0.06612611562013626, + "epoch": 7.99, + "learning_rate": 4.757482848437914e-05, + "loss": 0.0736, + "step": 8418, + "task_loss": 0.14050063490867615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999638675, + "compression_loss": 0.0, + "distillation_loss": 0.0231521874666214, + "epoch": 8.0, + "learning_rate": 4.7570248354853644e-05, + "loss": 0.0454, + "step": 8419, + "task_loss": 0.2456911951303482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.79999999997909, + "compression_loss": 0.0, + "distillation_loss": 0.027434296905994415, + "epoch": 8.0, + "learning_rate": 4.7565664125333845e-05, + "loss": 0.0334, + "step": 8420, + "task_loss": 0.08753321319818497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.799999999989294, + "compression_loss": 0.0, + "distillation_loss": 0.07552851736545563, + "epoch": 8.0, + "learning_rate": 4.7561075796652464e-05, + "loss": 0.0764, + "step": 8421, + "task_loss": 0.08394578844308853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999954834, + "compression_loss": 0.0, + "distillation_loss": 0.02573383040726185, + "epoch": 8.0, + "learning_rate": 4.755648336964302e-05, + "loss": 0.0239, + "step": 8422, + "task_loss": 0.0073777977377176285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999986618, + "compression_loss": 0.0, + "distillation_loss": 0.06676770746707916, + "epoch": 8.0, + "learning_rate": 4.7551886845139743e-05, + "loss": 0.089, + "step": 8423, + "task_loss": 0.28941696882247925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, + "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999998327, + "compression_loss": 0.0, + "distillation_loss": 0.007792739663273096, + "epoch": 8.0, + "learning_rate": 4.754728622397761e-05, + "loss": 0.0073, + "step": 8424, + "task_loss": 0.0028934250585734844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1277932971715927, + "epoch": 8.0, + "learning_rate": 4.754268150699234e-05, + "loss": 0.12, + "step": 8425, + "task_loss": 0.049523890018463135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.18691036105155945, + "epoch": 8.0, + "learning_rate": 4.753807269502041e-05, + "loss": 0.1797, + "step": 8426, + "task_loss": 0.11476030945777893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11132822930812836, + "epoch": 8.0, + "learning_rate": 4.7533459788899026e-05, + "loss": 0.1066, + "step": 8427, + "task_loss": 0.0645451620221138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06175380200147629, + "epoch": 8.0, + "learning_rate": 4.752884278946614e-05, + "loss": 0.0701, + "step": 8428, + "task_loss": 0.14483240246772766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10747231543064117, + "epoch": 8.0, + "learning_rate": 4.752422169756048e-05, + "loss": 0.1015, + "step": 8429, + "task_loss": 0.0479457788169384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15526169538497925, + "epoch": 8.01, + "learning_rate": 4.7519596514021464e-05, + "loss": 0.1475, + "step": 8430, + "task_loss": 0.07779216766357422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12560060620307922, + "epoch": 8.01, + "learning_rate": 4.751496723968929e-05, + "loss": 0.1216, + "step": 8431, + "task_loss": 0.08531402051448822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06614533811807632, + "epoch": 8.01, + "learning_rate": 4.751033387540488e-05, + "loss": 0.0747, + "step": 8432, + "task_loss": 0.151977077126503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.19305379688739777, + "epoch": 8.01, + "learning_rate": 4.7505696422009904e-05, + "loss": 0.1905, + "step": 8433, + "task_loss": 0.16793277859687805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05545946955680847, + "epoch": 8.01, + "learning_rate": 4.750105488034679e-05, + "loss": 0.0656, + "step": 8434, + "task_loss": 0.15694250166416168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09607134759426117, + "epoch": 8.01, + "learning_rate": 4.749640925125869e-05, + "loss": 0.1036, + "step": 8435, + "task_loss": 0.17179882526397705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036613885313272476, + "epoch": 8.01, + "learning_rate": 4.749175953558951e-05, + "loss": 0.0342, + "step": 8436, + "task_loss": 0.012063302099704742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1419782042503357, + "epoch": 8.01, + "learning_rate": 4.748710573418388e-05, + "loss": 0.1527, + "step": 8437, + "task_loss": 0.24885791540145874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016515720635652542, + "epoch": 8.01, + "learning_rate": 4.7482447847887204e-05, + "loss": 0.0154, + "step": 8438, + "task_loss": 0.00573185458779335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017054930329322815, + "epoch": 8.01, + "learning_rate": 4.747778587754559e-05, + "loss": 0.0218, + "step": 8439, + "task_loss": 0.06488415598869324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03005680814385414, + "epoch": 8.02, + "learning_rate": 4.7473119824005926e-05, + "loss": 0.0275, + "step": 8440, + "task_loss": 0.004504000768065453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05955897271633148, + "epoch": 8.02, + "learning_rate": 4.7468449688115806e-05, + "loss": 0.0621, + "step": 8441, + "task_loss": 0.08468262106180191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03443329036235809, + "epoch": 8.02, + "learning_rate": 4.74637754707236e-05, + "loss": 0.0326, + "step": 8442, + "task_loss": 0.016446424648165703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020430468022823334, + "epoch": 8.02, + "learning_rate": 4.7459097172678386e-05, + "loss": 0.0197, + "step": 8443, + "task_loss": 0.013592688366770744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12642060220241547, + "epoch": 8.02, + "learning_rate": 4.745441479483001e-05, + "loss": 0.1201, + "step": 8444, + "task_loss": 0.06335929036140442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06663849949836731, + "epoch": 8.02, + "learning_rate": 4.744972833802904e-05, + "loss": 0.0631, + "step": 8445, + "task_loss": 0.031001247465610504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.3165062665939331, + "epoch": 8.02, + "learning_rate": 4.74450378031268e-05, + "loss": 0.3055, + "step": 8446, + "task_loss": 0.20671746134757996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07666581869125366, + "epoch": 8.02, + "learning_rate": 4.744034319097535e-05, + "loss": 0.0726, + "step": 8447, + "task_loss": 0.035642966628074646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.050358712673187256, + "epoch": 8.02, + "learning_rate": 4.743564450242749e-05, + "loss": 0.0464, + "step": 8448, + "task_loss": 0.010279197245836258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017902474850416183, + "epoch": 8.02, + "learning_rate": 4.7430941738336745e-05, + "loss": 0.0164, + "step": 8449, + "task_loss": 0.002874387428164482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022135000675916672, + "epoch": 8.02, + "learning_rate": 4.742623489955741e-05, + "loss": 0.0304, + "step": 8450, + "task_loss": 0.10519418120384216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022276829928159714, + "epoch": 8.03, + "learning_rate": 4.74215239869445e-05, + "loss": 0.0206, + "step": 8451, + "task_loss": 0.005232140421867371 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07043208181858063, + "epoch": 8.03, + "learning_rate": 4.741680900135377e-05, + "loss": 0.0705, + "step": 8452, + "task_loss": 0.07124508917331696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0691102147102356, + "epoch": 8.03, + "learning_rate": 4.741208994364173e-05, + "loss": 0.0644, + "step": 8453, + "task_loss": 0.021717606112360954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026366369798779488, + "epoch": 8.03, + "learning_rate": 4.740736681466561e-05, + "loss": 0.0242, + "step": 8454, + "task_loss": 0.00459631159901619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1180252730846405, + "epoch": 8.03, + "learning_rate": 4.74026396152834e-05, + "loss": 0.119, + "step": 8455, + "task_loss": 0.1280374825000763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07965636998414993, + "epoch": 8.03, + "learning_rate": 4.7397908346353796e-05, + "loss": 0.0733, + "step": 8456, + "task_loss": 0.01603274792432785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09866228699684143, + "epoch": 8.03, + "learning_rate": 4.739317300873628e-05, + "loss": 0.0939, + "step": 8457, + "task_loss": 0.05102086067199707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05824364721775055, + "epoch": 8.03, + "learning_rate": 4.738843360329104e-05, + "loss": 0.0556, + "step": 8458, + "task_loss": 0.03139631822705269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0658094584941864, + "epoch": 8.03, + "learning_rate": 4.738369013087902e-05, + "loss": 0.0644, + "step": 8459, + "task_loss": 0.05181333050131798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022087426856160164, + "epoch": 8.03, + "learning_rate": 4.7378942592361876e-05, + "loss": 0.0291, + "step": 8460, + "task_loss": 0.09194046258926392 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013795844279229641, + "epoch": 8.04, + "learning_rate": 4.737419098860204e-05, + "loss": 0.0216, + "step": 8461, + "task_loss": 0.09192723780870438 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13639725744724274, + "epoch": 8.04, + "learning_rate": 4.7369435320462654e-05, + "loss": 0.1467, + "step": 8462, + "task_loss": 0.23894120752811432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07075132429599762, + "epoch": 8.04, + "learning_rate": 4.73646755888076e-05, + "loss": 0.0742, + "step": 8463, + "task_loss": 0.10508999973535538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031757891178131104, + "epoch": 8.04, + "learning_rate": 4.7359911794501526e-05, + "loss": 0.0294, + "step": 8464, + "task_loss": 0.007956236600875854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1145702600479126, + "epoch": 8.04, + "learning_rate": 4.7355143938409785e-05, + "loss": 0.1207, + "step": 8465, + "task_loss": 0.175959050655365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1442631483078003, + "epoch": 8.04, + "learning_rate": 4.735037202139849e-05, + "loss": 0.1416, + "step": 8466, + "task_loss": 0.1179547980427742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05049190670251846, + "epoch": 8.04, + "learning_rate": 4.734559604433447e-05, + "loss": 0.0476, + "step": 8467, + "task_loss": 0.0210769884288311 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021424848586320877, + "epoch": 8.04, + "learning_rate": 4.734081600808531e-05, + "loss": 0.0274, + "step": 8468, + "task_loss": 0.08106733858585358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027790479362010956, + "epoch": 8.04, + "learning_rate": 4.733603191351933e-05, + "loss": 0.0255, + "step": 8469, + "task_loss": 0.004662582650780678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021773390471935272, + "epoch": 8.04, + "learning_rate": 4.733124376150558e-05, + "loss": 0.027, + "step": 8470, + "task_loss": 0.07355040311813354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0780288502573967, + "epoch": 8.04, + "learning_rate": 4.7326451552913856e-05, + "loss": 0.0713, + "step": 8471, + "task_loss": 0.011100053787231445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07223173975944519, + "epoch": 8.05, + "learning_rate": 4.7321655288614674e-05, + "loss": 0.0675, + "step": 8472, + "task_loss": 0.024789290502667427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020628787577152252, + "epoch": 8.05, + "learning_rate": 4.7316854969479314e-05, + "loss": 0.0269, + "step": 8473, + "task_loss": 0.08376266062259674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08520027995109558, + "epoch": 8.05, + "learning_rate": 4.7312050596379764e-05, + "loss": 0.0812, + "step": 8474, + "task_loss": 0.0456637404859066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021483324468135834, + "epoch": 8.05, + "learning_rate": 4.730724217018877e-05, + "loss": 0.0244, + "step": 8475, + "task_loss": 0.050539180636405945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.28899598121643066, + "epoch": 8.05, + "learning_rate": 4.7302429691779806e-05, + "loss": 0.2756, + "step": 8476, + "task_loss": 0.154689222574234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016797779127955437, + "epoch": 8.05, + "learning_rate": 4.729761316202708e-05, + "loss": 0.0163, + "step": 8477, + "task_loss": 0.011920711025595665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02568567357957363, + "epoch": 8.05, + "learning_rate": 4.729279258180553e-05, + "loss": 0.0242, + "step": 8478, + "task_loss": 0.011267339810729027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04046610742807388, + "epoch": 8.05, + "learning_rate": 4.7287967951990855e-05, + "loss": 0.043, + "step": 8479, + "task_loss": 0.06550759077072144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.061091069132089615, + "epoch": 8.05, + "learning_rate": 4.7283139273459445e-05, + "loss": 0.0739, + "step": 8480, + "task_loss": 0.1896328330039978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09908688813447952, + "epoch": 8.05, + "learning_rate": 4.727830654708848e-05, + "loss": 0.0966, + "step": 8481, + "task_loss": 0.07403028011322021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03582286834716797, + "epoch": 8.06, + "learning_rate": 4.727346977375584e-05, + "loss": 0.0419, + "step": 8482, + "task_loss": 0.09690375626087189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015889644622802734, + "epoch": 8.06, + "learning_rate": 4.7268628954340136e-05, + "loss": 0.016, + "step": 8483, + "task_loss": 0.017025936394929886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013576588593423367, + "epoch": 8.06, + "learning_rate": 4.726378408972074e-05, + "loss": 0.0387, + "step": 8484, + "task_loss": 0.26450613141059875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10775915533304214, + "epoch": 8.06, + "learning_rate": 4.725893518077774e-05, + "loss": 0.1018, + "step": 8485, + "task_loss": 0.047988370060920715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04508550837635994, + "epoch": 8.06, + "learning_rate": 4.725408222839197e-05, + "loss": 0.0458, + "step": 8486, + "task_loss": 0.05229977145791054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01839440129697323, + "epoch": 8.06, + "learning_rate": 4.724922523344498e-05, + "loss": 0.0171, + "step": 8487, + "task_loss": 0.0054893046617507935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17300604283809662, + "epoch": 8.06, + "learning_rate": 4.724436419681907e-05, + "loss": 0.165, + "step": 8488, + "task_loss": 0.09258658438920975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2892667055130005, + "epoch": 8.06, + "learning_rate": 4.723949911939728e-05, + "loss": 0.2899, + "step": 8489, + "task_loss": 0.2951303720474243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10540339350700378, + "epoch": 8.06, + "learning_rate": 4.723463000206337e-05, + "loss": 0.102, + "step": 8490, + "task_loss": 0.07126626372337341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04866549000144005, + "epoch": 8.06, + "learning_rate": 4.722975684570183e-05, + "loss": 0.0448, + "step": 8491, + "task_loss": 0.009562673047184944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043563053011894226, + "epoch": 8.06, + "learning_rate": 4.7224879651197905e-05, + "loss": 0.0401, + "step": 8492, + "task_loss": 0.00909213162958622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12174250930547714, + "epoch": 8.07, + "learning_rate": 4.721999841943755e-05, + "loss": 0.1173, + "step": 8493, + "task_loss": 0.07682164013385773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019868122413754463, + "epoch": 8.07, + "learning_rate": 4.721511315130747e-05, + "loss": 0.029, + "step": 8494, + "task_loss": 0.11105664074420929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04917510598897934, + "epoch": 8.07, + "learning_rate": 4.7210223847695104e-05, + "loss": 0.0623, + "step": 8495, + "task_loss": 0.1808261275291443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2701743543148041, + "epoch": 8.07, + "learning_rate": 4.72053305094886e-05, + "loss": 0.2656, + "step": 8496, + "task_loss": 0.22409585118293762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03515856713056564, + "epoch": 8.07, + "learning_rate": 4.720043313757687e-05, + "loss": 0.0465, + "step": 8497, + "task_loss": 0.14875555038452148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.20637600123882294, + "epoch": 8.07, + "learning_rate": 4.719553173284955e-05, + "loss": 0.2016, + "step": 8498, + "task_loss": 0.15902316570281982 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10677587240934372, + "epoch": 8.07, + "learning_rate": 4.719062629619699e-05, + "loss": 0.1103, + "step": 8499, + "task_loss": 0.14183999598026276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11615284532308578, + "epoch": 8.07, + "learning_rate": 4.71857168285103e-05, + "loss": 0.1115, + "step": 8500, + "task_loss": 0.07010817527770996 + }, + { + "epoch": 8.07, + "eval_accuracy": 0.8864678899082569, + "eval_loss": 0.4948354661464691, + "eval_runtime": 18.1296, + "eval_samples_per_second": 48.098, + "eval_steps_per_second": 6.012, + "step": 8500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10728442668914795, + "epoch": 8.07, + "learning_rate": 4.718080333068129e-05, + "loss": 0.1144, + "step": 8501, + "task_loss": 0.17879854142665863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15180592238903046, + "epoch": 8.07, + "learning_rate": 4.717588580360253e-05, + "loss": 0.1669, + "step": 8502, + "task_loss": 0.30242693424224854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03224493935704231, + "epoch": 8.08, + "learning_rate": 4.717096424816731e-05, + "loss": 0.0302, + "step": 8503, + "task_loss": 0.012275317683815956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.18254992365837097, + "epoch": 8.08, + "learning_rate": 4.716603866526967e-05, + "loss": 0.1913, + "step": 8504, + "task_loss": 0.2695552706718445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01585567556321621, + "epoch": 8.08, + "learning_rate": 4.7161109055804356e-05, + "loss": 0.0241, + "step": 8505, + "task_loss": 0.09879221767187119 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017822248861193657, + "epoch": 8.08, + "learning_rate": 4.7156175420666844e-05, + "loss": 0.0263, + "step": 8506, + "task_loss": 0.10284404456615448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05827868729829788, + "epoch": 8.08, + "learning_rate": 4.715123776075336e-05, + "loss": 0.0578, + "step": 8507, + "task_loss": 0.05304684489965439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08329082280397415, + "epoch": 8.08, + "learning_rate": 4.714629607696086e-05, + "loss": 0.0791, + "step": 8508, + "task_loss": 0.041491199284791946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01390088815242052, + "epoch": 8.08, + "learning_rate": 4.714135037018702e-05, + "loss": 0.0129, + "step": 8509, + "task_loss": 0.004111597314476967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07340320199728012, + "epoch": 8.08, + "learning_rate": 4.713640064133025e-05, + "loss": 0.0721, + "step": 8510, + "task_loss": 0.06043552607297897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023112181574106216, + "epoch": 8.08, + "learning_rate": 4.7131446891289694e-05, + "loss": 0.0217, + "step": 8511, + "task_loss": 0.008507607504725456 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.165248841047287, + "epoch": 8.08, + "learning_rate": 4.712648912096522e-05, + "loss": 0.1713, + "step": 8512, + "task_loss": 0.2260192334651947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.18116983771324158, + "epoch": 8.08, + "learning_rate": 4.712152733125744e-05, + "loss": 0.1778, + "step": 8513, + "task_loss": 0.14782485365867615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03632102161645889, + "epoch": 8.09, + "learning_rate": 4.711656152306768e-05, + "loss": 0.0373, + "step": 8514, + "task_loss": 0.04582914710044861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01714969053864479, + "epoch": 8.09, + "learning_rate": 4.711159169729801e-05, + "loss": 0.0161, + "step": 8515, + "task_loss": 0.006688836961984634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13296061754226685, + "epoch": 8.09, + "learning_rate": 4.710661785485121e-05, + "loss": 0.1435, + "step": 8516, + "task_loss": 0.23862020671367645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017725428566336632, + "epoch": 8.09, + "learning_rate": 4.710163999663081e-05, + "loss": 0.0163, + "step": 8517, + "task_loss": 0.003909563645720482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2052067518234253, + "epoch": 8.09, + "learning_rate": 4.709665812354107e-05, + "loss": 0.1971, + "step": 8518, + "task_loss": 0.12428037822246552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022109489887952805, + "epoch": 8.09, + "learning_rate": 4.709167223648695e-05, + "loss": 0.0271, + "step": 8519, + "task_loss": 0.07243168354034424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01610475778579712, + "epoch": 8.09, + "learning_rate": 4.7086682336374187e-05, + "loss": 0.0271, + "step": 8520, + "task_loss": 0.1261139065027237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1497061550617218, + "epoch": 8.09, + "learning_rate": 4.70816884241092e-05, + "loss": 0.1447, + "step": 8521, + "task_loss": 0.09966839849948883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05508185923099518, + "epoch": 8.09, + "learning_rate": 4.7076690500599164e-05, + "loss": 0.0549, + "step": 8522, + "task_loss": 0.05369473248720169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022330323234200478, + "epoch": 8.09, + "learning_rate": 4.707168856675198e-05, + "loss": 0.0208, + "step": 8523, + "task_loss": 0.007413491606712341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03679078072309494, + "epoch": 8.09, + "learning_rate": 4.7066682623476265e-05, + "loss": 0.0414, + "step": 8524, + "task_loss": 0.08316943049430847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016818320378661156, + "epoch": 8.1, + "learning_rate": 4.706167267168138e-05, + "loss": 0.0156, + "step": 8525, + "task_loss": 0.004215966910123825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026049071922898293, + "epoch": 8.1, + "learning_rate": 4.70566587122774e-05, + "loss": 0.0302, + "step": 8526, + "task_loss": 0.06748848408460617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12002403289079666, + "epoch": 8.1, + "learning_rate": 4.7051640746175147e-05, + "loss": 0.1124, + "step": 8527, + "task_loss": 0.04354723170399666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1447361707687378, + "epoch": 8.1, + "learning_rate": 4.7046618774286146e-05, + "loss": 0.1348, + "step": 8528, + "task_loss": 0.04555728659033775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05529174208641052, + "epoch": 8.1, + "learning_rate": 4.7041592797522664e-05, + "loss": 0.0689, + "step": 8529, + "task_loss": 0.19117003679275513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032082926481962204, + "epoch": 8.1, + "learning_rate": 4.7036562816797705e-05, + "loss": 0.0319, + "step": 8530, + "task_loss": 0.030074482783675194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04032071679830551, + "epoch": 8.1, + "learning_rate": 4.7031528833024976e-05, + "loss": 0.0484, + "step": 8531, + "task_loss": 0.12074785679578781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.19668981432914734, + "epoch": 8.1, + "learning_rate": 4.702649084711892e-05, + "loss": 0.1871, + "step": 8532, + "task_loss": 0.10096022486686707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09332388639450073, + "epoch": 8.1, + "learning_rate": 4.7021448859994735e-05, + "loss": 0.0903, + "step": 8533, + "task_loss": 0.06286977231502533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07967086136341095, + "epoch": 8.1, + "learning_rate": 4.70164028725683e-05, + "loss": 0.089, + "step": 8534, + "task_loss": 0.17311781644821167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04477657005190849, + "epoch": 8.11, + "learning_rate": 4.7011352885756255e-05, + "loss": 0.0428, + "step": 8535, + "task_loss": 0.025373056530952454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09086555242538452, + "epoch": 8.11, + "learning_rate": 4.7006298900475954e-05, + "loss": 0.0939, + "step": 8536, + "task_loss": 0.12131273001432419 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021383512765169144, + "epoch": 8.11, + "learning_rate": 4.7001240917645465e-05, + "loss": 0.0204, + "step": 8537, + "task_loss": 0.011899461969733238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11136257648468018, + "epoch": 8.11, + "learning_rate": 4.699617893818361e-05, + "loss": 0.1068, + "step": 8538, + "task_loss": 0.06574281305074692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.052705250680446625, + "epoch": 8.11, + "learning_rate": 4.699111296300992e-05, + "loss": 0.0526, + "step": 8539, + "task_loss": 0.05185743421316147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04489322006702423, + "epoch": 8.11, + "learning_rate": 4.6986042993044645e-05, + "loss": 0.0547, + "step": 8540, + "task_loss": 0.14266639947891235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11219049990177155, + "epoch": 8.11, + "learning_rate": 4.698096902920877e-05, + "loss": 0.1172, + "step": 8541, + "task_loss": 0.1626092791557312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04092048108577728, + "epoch": 8.11, + "learning_rate": 4.6975891072424015e-05, + "loss": 0.041, + "step": 8542, + "task_loss": 0.04180833697319031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023674312978982925, + "epoch": 8.11, + "learning_rate": 4.697080912361281e-05, + "loss": 0.0218, + "step": 8543, + "task_loss": 0.004959875717759132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014916968531906605, + "epoch": 8.11, + "learning_rate": 4.696572318369831e-05, + "loss": 0.0276, + "step": 8544, + "task_loss": 0.14138737320899963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02858028933405876, + "epoch": 8.11, + "learning_rate": 4.696063325360441e-05, + "loss": 0.0384, + "step": 8545, + "task_loss": 0.1269652247428894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1576182246208191, + "epoch": 8.12, + "learning_rate": 4.6955539334255716e-05, + "loss": 0.1567, + "step": 8546, + "task_loss": 0.14879369735717773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.060055363923311234, + "epoch": 8.12, + "learning_rate": 4.6950441426577565e-05, + "loss": 0.0705, + "step": 8547, + "task_loss": 0.16406583786010742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07873189449310303, + "epoch": 8.12, + "learning_rate": 4.694533953149601e-05, + "loss": 0.0885, + "step": 8548, + "task_loss": 0.17685380578041077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015233471989631653, + "epoch": 8.12, + "learning_rate": 4.694023364993784e-05, + "loss": 0.0165, + "step": 8549, + "task_loss": 0.027818040922284126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01854429766535759, + "epoch": 8.12, + "learning_rate": 4.693512378283056e-05, + "loss": 0.0253, + "step": 8550, + "task_loss": 0.08647675812244415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05369843915104866, + "epoch": 8.12, + "learning_rate": 4.693000993110241e-05, + "loss": 0.049, + "step": 8551, + "task_loss": 0.006441434845328331 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0389072448015213, + "epoch": 8.12, + "learning_rate": 4.692489209568234e-05, + "loss": 0.0412, + "step": 8552, + "task_loss": 0.06141179800033569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015452216379344463, + "epoch": 8.12, + "learning_rate": 4.691977027750002e-05, + "loss": 0.0143, + "step": 8553, + "task_loss": 0.004333069548010826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01049085147678852, + "epoch": 8.12, + "learning_rate": 4.691464447748587e-05, + "loss": 0.0097, + "step": 8554, + "task_loss": 0.003054805099964142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08345725387334824, + "epoch": 8.12, + "learning_rate": 4.690951469657101e-05, + "loss": 0.0808, + "step": 8555, + "task_loss": 0.05647343769669533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013090891763567924, + "epoch": 8.13, + "learning_rate": 4.690438093568728e-05, + "loss": 0.0123, + "step": 8556, + "task_loss": 0.005195150151848793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03142661973834038, + "epoch": 8.13, + "learning_rate": 4.689924319576727e-05, + "loss": 0.0294, + "step": 8557, + "task_loss": 0.010683547705411911 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12756206095218658, + "epoch": 8.13, + "learning_rate": 4.689410147774426e-05, + "loss": 0.1162, + "step": 8558, + "task_loss": 0.014250561594963074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04624927043914795, + "epoch": 8.13, + "learning_rate": 4.6888955782552274e-05, + "loss": 0.0421, + "step": 8559, + "task_loss": 0.00475945882499218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025834165513515472, + "epoch": 8.13, + "learning_rate": 4.688380611112605e-05, + "loss": 0.0244, + "step": 8560, + "task_loss": 0.011195512488484383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02918969839811325, + "epoch": 8.13, + "learning_rate": 4.687865246440106e-05, + "loss": 0.0374, + "step": 8561, + "task_loss": 0.11153009533882141 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012696018442511559, + "epoch": 8.13, + "learning_rate": 4.687349484331347e-05, + "loss": 0.0118, + "step": 8562, + "task_loss": 0.0034159980714321136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01882791891694069, + "epoch": 8.13, + "learning_rate": 4.6868333248800204e-05, + "loss": 0.0241, + "step": 8563, + "task_loss": 0.07115612179040909 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1464093029499054, + "epoch": 8.13, + "learning_rate": 4.686316768179889e-05, + "loss": 0.1556, + "step": 8564, + "task_loss": 0.23828445374965668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0876997858285904, + "epoch": 8.13, + "learning_rate": 4.685799814324786e-05, + "loss": 0.091, + "step": 8565, + "task_loss": 0.12048880755901337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2233988344669342, + "epoch": 8.13, + "learning_rate": 4.685282463408621e-05, + "loss": 0.2209, + "step": 8566, + "task_loss": 0.198834627866745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02312629297375679, + "epoch": 8.14, + "learning_rate": 4.6847647155253716e-05, + "loss": 0.0287, + "step": 8567, + "task_loss": 0.0787849947810173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024075020104646683, + "epoch": 8.14, + "learning_rate": 4.684246570769089e-05, + "loss": 0.0264, + "step": 8568, + "task_loss": 0.047648604959249496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1928960680961609, + "epoch": 8.14, + "learning_rate": 4.683728029233898e-05, + "loss": 0.1874, + "step": 8569, + "task_loss": 0.13750508427619934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09075068682432175, + "epoch": 8.14, + "learning_rate": 4.683209091013994e-05, + "loss": 0.0963, + "step": 8570, + "task_loss": 0.1463644802570343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04680275171995163, + "epoch": 8.14, + "learning_rate": 4.682689756203643e-05, + "loss": 0.0503, + "step": 8571, + "task_loss": 0.08158842474222183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02860000915825367, + "epoch": 8.14, + "learning_rate": 4.682170024897187e-05, + "loss": 0.0327, + "step": 8572, + "task_loss": 0.06912478804588318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08807064592838287, + "epoch": 8.14, + "learning_rate": 4.681649897189036e-05, + "loss": 0.0828, + "step": 8573, + "task_loss": 0.03544972091913223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03363920748233795, + "epoch": 8.14, + "learning_rate": 4.681129373173674e-05, + "loss": 0.0422, + "step": 8574, + "task_loss": 0.11956753581762314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04612544924020767, + "epoch": 8.14, + "learning_rate": 4.6806084529456574e-05, + "loss": 0.044, + "step": 8575, + "task_loss": 0.025233760476112366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09824679046869278, + "epoch": 8.14, + "learning_rate": 4.6800871365996135e-05, + "loss": 0.0946, + "step": 8576, + "task_loss": 0.06206507235765457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08123184740543365, + "epoch": 8.15, + "learning_rate": 4.679565424230241e-05, + "loss": 0.0874, + "step": 8577, + "task_loss": 0.143352210521698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03590530529618263, + "epoch": 8.15, + "learning_rate": 4.679043315932313e-05, + "loss": 0.04, + "step": 8578, + "task_loss": 0.07692985236644745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026768725365400314, + "epoch": 8.15, + "learning_rate": 4.6785208118006715e-05, + "loss": 0.0329, + "step": 8579, + "task_loss": 0.08837291598320007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0306844525039196, + "epoch": 8.15, + "learning_rate": 4.677997911930234e-05, + "loss": 0.0395, + "step": 8580, + "task_loss": 0.11881330609321594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03350158408284187, + "epoch": 8.15, + "learning_rate": 4.6774746164159854e-05, + "loss": 0.0419, + "step": 8581, + "task_loss": 0.11749888956546783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16534090042114258, + "epoch": 8.15, + "learning_rate": 4.676950925352986e-05, + "loss": 0.1613, + "step": 8582, + "task_loss": 0.1252642273902893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01491392683237791, + "epoch": 8.15, + "learning_rate": 4.676426838836367e-05, + "loss": 0.0203, + "step": 8583, + "task_loss": 0.06893758475780487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04224181920289993, + "epoch": 8.15, + "learning_rate": 4.675902356961331e-05, + "loss": 0.0482, + "step": 8584, + "task_loss": 0.10220709443092346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024668492376804352, + "epoch": 8.15, + "learning_rate": 4.675377479823153e-05, + "loss": 0.033, + "step": 8585, + "task_loss": 0.10758410394191742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04424036294221878, + "epoch": 8.15, + "learning_rate": 4.6748522075171784e-05, + "loss": 0.0558, + "step": 8586, + "task_loss": 0.15995724499225616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05094079673290253, + "epoch": 8.15, + "learning_rate": 4.674326540138826e-05, + "loss": 0.0489, + "step": 8587, + "task_loss": 0.030478911474347115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14906425774097443, + "epoch": 8.16, + "learning_rate": 4.673800477783587e-05, + "loss": 0.1457, + "step": 8588, + "task_loss": 0.11561848223209381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051018789410591125, + "epoch": 8.16, + "learning_rate": 4.6732740205470206e-05, + "loss": 0.0608, + "step": 8589, + "task_loss": 0.14838165044784546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03625892847776413, + "epoch": 8.16, + "learning_rate": 4.672747168524762e-05, + "loss": 0.0397, + "step": 8590, + "task_loss": 0.07113449275493622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025677144527435303, + "epoch": 8.16, + "learning_rate": 4.672219921812517e-05, + "loss": 0.0392, + "step": 8591, + "task_loss": 0.16079393029212952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08316448330879211, + "epoch": 8.16, + "learning_rate": 4.671692280506061e-05, + "loss": 0.0914, + "step": 8592, + "task_loss": 0.16506913304328918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09451714903116226, + "epoch": 8.16, + "learning_rate": 4.671164244701243e-05, + "loss": 0.0976, + "step": 8593, + "task_loss": 0.12491125613451004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.24787138402462006, + "epoch": 8.16, + "learning_rate": 4.670635814493984e-05, + "loss": 0.2439, + "step": 8594, + "task_loss": 0.20846538245677948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0107728848233819, + "epoch": 8.16, + "learning_rate": 4.6701069899802755e-05, + "loss": 0.01, + "step": 8595, + "task_loss": 0.0030124839395284653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12116492539644241, + "epoch": 8.16, + "learning_rate": 4.669577771256181e-05, + "loss": 0.1183, + "step": 8596, + "task_loss": 0.09291059523820877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034399181604385376, + "epoch": 8.16, + "learning_rate": 4.6690481584178354e-05, + "loss": 0.0317, + "step": 8597, + "task_loss": 0.007727684453129768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0222720205783844, + "epoch": 8.17, + "learning_rate": 4.6685181515614454e-05, + "loss": 0.0206, + "step": 8598, + "task_loss": 0.0058763641864061356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07261856645345688, + "epoch": 8.17, + "learning_rate": 4.6679877507832895e-05, + "loss": 0.0695, + "step": 8599, + "task_loss": 0.041326478123664856 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07069084793329239, + "epoch": 8.17, + "learning_rate": 4.6674569561797174e-05, + "loss": 0.0644, + "step": 8600, + "task_loss": 0.0074882470071315765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037158627063035965, + "epoch": 8.17, + "learning_rate": 4.666925767847151e-05, + "loss": 0.0424, + "step": 8601, + "task_loss": 0.08922519534826279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03928046673536301, + "epoch": 8.17, + "learning_rate": 4.6663941858820825e-05, + "loss": 0.051, + "step": 8602, + "task_loss": 0.15673944354057312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.040694400668144226, + "epoch": 8.17, + "learning_rate": 4.665862210381077e-05, + "loss": 0.0384, + "step": 8603, + "task_loss": 0.01785385236144066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06909865885972977, + "epoch": 8.17, + "learning_rate": 4.66532984144077e-05, + "loss": 0.0671, + "step": 8604, + "task_loss": 0.04940890148282051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027597632259130478, + "epoch": 8.17, + "learning_rate": 4.6647970791578685e-05, + "loss": 0.0295, + "step": 8605, + "task_loss": 0.0467480830848217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015055635944008827, + "epoch": 8.17, + "learning_rate": 4.664263923629153e-05, + "loss": 0.014, + "step": 8606, + "task_loss": 0.0045957863330841064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11752206087112427, + "epoch": 8.17, + "learning_rate": 4.663730374951472e-05, + "loss": 0.1067, + "step": 8607, + "task_loss": 0.009500864893198013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07718470692634583, + "epoch": 8.17, + "learning_rate": 4.663196433221747e-05, + "loss": 0.0789, + "step": 8608, + "task_loss": 0.09422293305397034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027676379308104515, + "epoch": 8.18, + "learning_rate": 4.6626620985369724e-05, + "loss": 0.0254, + "step": 8609, + "task_loss": 0.004925228655338287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05825607106089592, + "epoch": 8.18, + "learning_rate": 4.662127370994212e-05, + "loss": 0.0602, + "step": 8610, + "task_loss": 0.07727469503879547 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029302431270480156, + "epoch": 8.18, + "learning_rate": 4.6615922506906016e-05, + "loss": 0.0288, + "step": 8611, + "task_loss": 0.024653693661093712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1494738757610321, + "epoch": 8.18, + "learning_rate": 4.661056737723349e-05, + "loss": 0.1421, + "step": 8612, + "task_loss": 0.07620874047279358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0703209936618805, + "epoch": 8.18, + "learning_rate": 4.660520832189732e-05, + "loss": 0.0738, + "step": 8613, + "task_loss": 0.10534647107124329 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02900974452495575, + "epoch": 8.18, + "learning_rate": 4.6599845341871005e-05, + "loss": 0.0268, + "step": 8614, + "task_loss": 0.007358167320489883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013132540509104729, + "epoch": 8.18, + "learning_rate": 4.6594478438128757e-05, + "loss": 0.0122, + "step": 8615, + "task_loss": 0.003975642845034599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05000152438879013, + "epoch": 8.18, + "learning_rate": 4.6589107611645497e-05, + "loss": 0.0534, + "step": 8616, + "task_loss": 0.08441969007253647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014517636969685555, + "epoch": 8.18, + "learning_rate": 4.658373286339688e-05, + "loss": 0.0181, + "step": 8617, + "task_loss": 0.050333477556705475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11546745896339417, + "epoch": 8.18, + "learning_rate": 4.6578354194359227e-05, + "loss": 0.1114, + "step": 8618, + "task_loss": 0.07436027377843857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03974412381649017, + "epoch": 8.19, + "learning_rate": 4.657297160550961e-05, + "loss": 0.0471, + "step": 8619, + "task_loss": 0.11349973827600479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02339399978518486, + "epoch": 8.19, + "learning_rate": 4.656758509782582e-05, + "loss": 0.0319, + "step": 8620, + "task_loss": 0.10848347842693329 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03902260586619377, + "epoch": 8.19, + "learning_rate": 4.6562194672286306e-05, + "loss": 0.0458, + "step": 8621, + "task_loss": 0.10702133923768997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029453573748469353, + "epoch": 8.19, + "learning_rate": 4.65568003298703e-05, + "loss": 0.0416, + "step": 8622, + "task_loss": 0.15059900283813477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0717625841498375, + "epoch": 8.19, + "learning_rate": 4.655140207155769e-05, + "loss": 0.0746, + "step": 8623, + "task_loss": 0.09989339113235474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03731407970190048, + "epoch": 8.19, + "learning_rate": 4.65459998983291e-05, + "loss": 0.0491, + "step": 8624, + "task_loss": 0.15505677461624146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0684528648853302, + "epoch": 8.19, + "learning_rate": 4.6540593811165866e-05, + "loss": 0.0718, + "step": 8625, + "task_loss": 0.10180987417697906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.18652784824371338, + "epoch": 8.19, + "learning_rate": 4.653518381105002e-05, + "loss": 0.1783, + "step": 8626, + "task_loss": 0.10443468391895294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0480349101126194, + "epoch": 8.19, + "learning_rate": 4.6529769898964325e-05, + "loss": 0.0462, + "step": 8627, + "task_loss": 0.02956531010568142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09778586030006409, + "epoch": 8.19, + "learning_rate": 4.652435207589224e-05, + "loss": 0.1007, + "step": 8628, + "task_loss": 0.12683331966400146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025784488767385483, + "epoch": 8.19, + "learning_rate": 4.651893034281793e-05, + "loss": 0.0257, + "step": 8629, + "task_loss": 0.024955110624432564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05094904452562332, + "epoch": 8.2, + "learning_rate": 4.6513504700726293e-05, + "loss": 0.065, + "step": 8630, + "task_loss": 0.19132231175899506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10476318001747131, + "epoch": 8.2, + "learning_rate": 4.650807515060291e-05, + "loss": 0.1067, + "step": 8631, + "task_loss": 0.12431478500366211 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17144785821437836, + "epoch": 8.2, + "learning_rate": 4.650264169343411e-05, + "loss": 0.1651, + "step": 8632, + "task_loss": 0.108132004737854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04371756315231323, + "epoch": 8.2, + "learning_rate": 4.6497204330206874e-05, + "loss": 0.0494, + "step": 8633, + "task_loss": 0.10011433064937592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024459153413772583, + "epoch": 8.2, + "learning_rate": 4.649176306190895e-05, + "loss": 0.0227, + "step": 8634, + "task_loss": 0.0073354970663785934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030715011060237885, + "epoch": 8.2, + "learning_rate": 4.648631788952874e-05, + "loss": 0.0359, + "step": 8635, + "task_loss": 0.0821438580751419 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07763121277093887, + "epoch": 8.2, + "learning_rate": 4.6480868814055424e-05, + "loss": 0.0733, + "step": 8636, + "task_loss": 0.0338205024600029 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029433991760015488, + "epoch": 8.2, + "learning_rate": 4.647541583647883e-05, + "loss": 0.0364, + "step": 8637, + "task_loss": 0.09859539568424225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2271520048379898, + "epoch": 8.2, + "learning_rate": 4.646995895778952e-05, + "loss": 0.2236, + "step": 8638, + "task_loss": 0.19156351685523987 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0869775339961052, + "epoch": 8.2, + "learning_rate": 4.646449817897876e-05, + "loss": 0.0906, + "step": 8639, + "task_loss": 0.1231326162815094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024121176451444626, + "epoch": 8.21, + "learning_rate": 4.645903350103855e-05, + "loss": 0.0275, + "step": 8640, + "task_loss": 0.058099135756492615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019324442371726036, + "epoch": 8.21, + "learning_rate": 4.6453564924961544e-05, + "loss": 0.0184, + "step": 8641, + "task_loss": 0.010152887552976608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01817592792212963, + "epoch": 8.21, + "learning_rate": 4.644809245174114e-05, + "loss": 0.0266, + "step": 8642, + "task_loss": 0.10227106511592865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02699386328458786, + "epoch": 8.21, + "learning_rate": 4.6442616082371466e-05, + "loss": 0.0387, + "step": 8643, + "task_loss": 0.14438271522521973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0430140346288681, + "epoch": 8.21, + "learning_rate": 4.64371358178473e-05, + "loss": 0.0394, + "step": 8644, + "task_loss": 0.006518969312310219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03487422317266464, + "epoch": 8.21, + "learning_rate": 4.6431651659164174e-05, + "loss": 0.0456, + "step": 8645, + "task_loss": 0.14229948818683624 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027154915034770966, + "epoch": 8.21, + "learning_rate": 4.6426163607318305e-05, + "loss": 0.0251, + "step": 8646, + "task_loss": 0.006713952869176865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03345032036304474, + "epoch": 8.21, + "learning_rate": 4.642067166330663e-05, + "loss": 0.0407, + "step": 8647, + "task_loss": 0.10630976408720016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030117200687527657, + "epoch": 8.21, + "learning_rate": 4.6415175828126786e-05, + "loss": 0.0276, + "step": 8648, + "task_loss": 0.004965195432305336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08309546858072281, + "epoch": 8.21, + "learning_rate": 4.640967610277711e-05, + "loss": 0.0893, + "step": 8649, + "task_loss": 0.14526864886283875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10631243884563446, + "epoch": 8.21, + "learning_rate": 4.640417248825667e-05, + "loss": 0.102, + "step": 8650, + "task_loss": 0.0636465847492218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04527665674686432, + "epoch": 8.22, + "learning_rate": 4.63986649855652e-05, + "loss": 0.0411, + "step": 8651, + "task_loss": 0.003472359851002693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1377941519021988, + "epoch": 8.22, + "learning_rate": 4.639315359570319e-05, + "loss": 0.1355, + "step": 8652, + "task_loss": 0.11462458968162537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03246547654271126, + "epoch": 8.22, + "learning_rate": 4.6387638319671786e-05, + "loss": 0.0305, + "step": 8653, + "task_loss": 0.013004310429096222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028419774025678635, + "epoch": 8.22, + "learning_rate": 4.6382119158472895e-05, + "loss": 0.0263, + "step": 8654, + "task_loss": 0.006932957097887993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017502637580037117, + "epoch": 8.22, + "learning_rate": 4.637659611310907e-05, + "loss": 0.0174, + "step": 8655, + "task_loss": 0.01657920889556408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13791508972644806, + "epoch": 8.22, + "learning_rate": 4.637106918458361e-05, + "loss": 0.1373, + "step": 8656, + "task_loss": 0.1319553405046463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.055541716516017914, + "epoch": 8.22, + "learning_rate": 4.636553837390051e-05, + "loss": 0.0648, + "step": 8657, + "task_loss": 0.14844538271427155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10020337253808975, + "epoch": 8.22, + "learning_rate": 4.636000368206447e-05, + "loss": 0.0989, + "step": 8658, + "task_loss": 0.08745051920413971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09979698061943054, + "epoch": 8.22, + "learning_rate": 4.6354465110080885e-05, + "loss": 0.0986, + "step": 8659, + "task_loss": 0.08759049326181412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12549327313899994, + "epoch": 8.22, + "learning_rate": 4.6348922658955874e-05, + "loss": 0.1235, + "step": 8660, + "task_loss": 0.10560569912195206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11727667599916458, + "epoch": 8.23, + "learning_rate": 4.634337632969624e-05, + "loss": 0.1162, + "step": 8661, + "task_loss": 0.10662281513214111 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04554348438978195, + "epoch": 8.23, + "learning_rate": 4.6337826123309505e-05, + "loss": 0.0446, + "step": 8662, + "task_loss": 0.035856109112501144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.052027251571416855, + "epoch": 8.23, + "learning_rate": 4.6332272040803895e-05, + "loss": 0.0485, + "step": 8663, + "task_loss": 0.016855746507644653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0639544203877449, + "epoch": 8.23, + "learning_rate": 4.632671408318833e-05, + "loss": 0.0608, + "step": 8664, + "task_loss": 0.03275006264448166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07394473999738693, + "epoch": 8.23, + "learning_rate": 4.6321152251472435e-05, + "loss": 0.081, + "step": 8665, + "task_loss": 0.14465902745723724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07188697159290314, + "epoch": 8.23, + "learning_rate": 4.6315586546666556e-05, + "loss": 0.0734, + "step": 8666, + "task_loss": 0.08725833147764206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023782558739185333, + "epoch": 8.23, + "learning_rate": 4.631001696978172e-05, + "loss": 0.0278, + "step": 8667, + "task_loss": 0.06402748823165894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032316289842128754, + "epoch": 8.23, + "learning_rate": 4.630444352182968e-05, + "loss": 0.0384, + "step": 8668, + "task_loss": 0.09317083656787872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030993333086371422, + "epoch": 8.23, + "learning_rate": 4.6298866203822865e-05, + "loss": 0.0293, + "step": 8669, + "task_loss": 0.01442483440041542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11551865190267563, + "epoch": 8.23, + "learning_rate": 4.629328501677442e-05, + "loss": 0.1106, + "step": 8670, + "task_loss": 0.06652466952800751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10263466089963913, + "epoch": 8.23, + "learning_rate": 4.6287699961698214e-05, + "loss": 0.0992, + "step": 8671, + "task_loss": 0.06787226349115372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04704195261001587, + "epoch": 8.24, + "learning_rate": 4.6282111039608784e-05, + "loss": 0.0512, + "step": 8672, + "task_loss": 0.08842041343450546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11540669202804565, + "epoch": 8.24, + "learning_rate": 4.6276518251521384e-05, + "loss": 0.1093, + "step": 8673, + "task_loss": 0.05394323915243149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019262634217739105, + "epoch": 8.24, + "learning_rate": 4.6270921598451974e-05, + "loss": 0.0268, + "step": 8674, + "task_loss": 0.09500335156917572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06018088012933731, + "epoch": 8.24, + "learning_rate": 4.6265321081417223e-05, + "loss": 0.0591, + "step": 8675, + "task_loss": 0.049118801951408386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13014157116413116, + "epoch": 8.24, + "learning_rate": 4.625971670143447e-05, + "loss": 0.1193, + "step": 8676, + "task_loss": 0.02204965241253376 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024654846638441086, + "epoch": 8.24, + "learning_rate": 4.625410845952181e-05, + "loss": 0.023, + "step": 8677, + "task_loss": 0.007745366543531418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02617044746875763, + "epoch": 8.24, + "learning_rate": 4.6248496356697966e-05, + "loss": 0.0267, + "step": 8678, + "task_loss": 0.03177504613995552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022578658536076546, + "epoch": 8.24, + "learning_rate": 4.6242880393982436e-05, + "loss": 0.0278, + "step": 8679, + "task_loss": 0.07478535175323486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06433162093162537, + "epoch": 8.24, + "learning_rate": 4.623726057239537e-05, + "loss": 0.0829, + "step": 8680, + "task_loss": 0.24952758848667145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028324808925390244, + "epoch": 8.24, + "learning_rate": 4.623163689295764e-05, + "loss": 0.0271, + "step": 8681, + "task_loss": 0.01628146879374981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03707346320152283, + "epoch": 8.25, + "learning_rate": 4.6226009356690825e-05, + "loss": 0.0343, + "step": 8682, + "task_loss": 0.009730465710163116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0707152709364891, + "epoch": 8.25, + "learning_rate": 4.622037796461718e-05, + "loss": 0.0825, + "step": 8683, + "task_loss": 0.18847694993019104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015443956479430199, + "epoch": 8.25, + "learning_rate": 4.621474271775968e-05, + "loss": 0.0152, + "step": 8684, + "task_loss": 0.013239886611700058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0768551155924797, + "epoch": 8.25, + "learning_rate": 4.620910361714199e-05, + "loss": 0.0735, + "step": 8685, + "task_loss": 0.043199822306632996 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04744374752044678, + "epoch": 8.25, + "learning_rate": 4.620346066378849e-05, + "loss": 0.0446, + "step": 8686, + "task_loss": 0.018627936020493507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03644431009888649, + "epoch": 8.25, + "learning_rate": 4.619781385872424e-05, + "loss": 0.0407, + "step": 8687, + "task_loss": 0.07931828498840332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028153453022241592, + "epoch": 8.25, + "learning_rate": 4.6192163202975013e-05, + "loss": 0.0388, + "step": 8688, + "task_loss": 0.1342998743057251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02188189886510372, + "epoch": 8.25, + "learning_rate": 4.618650869756728e-05, + "loss": 0.0249, + "step": 8689, + "task_loss": 0.0519079715013504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1566876918077469, + "epoch": 8.25, + "learning_rate": 4.6180850343528205e-05, + "loss": 0.1496, + "step": 8690, + "task_loss": 0.08567800372838974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04210164025425911, + "epoch": 8.25, + "learning_rate": 4.617518814188565e-05, + "loss": 0.0436, + "step": 8691, + "task_loss": 0.0568234808743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019453078508377075, + "epoch": 8.25, + "learning_rate": 4.6169522093668196e-05, + "loss": 0.0268, + "step": 8692, + "task_loss": 0.09267456084489822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028510289266705513, + "epoch": 8.26, + "learning_rate": 4.61638521999051e-05, + "loss": 0.0422, + "step": 8693, + "task_loss": 0.16586299240589142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09608705341815948, + "epoch": 8.26, + "learning_rate": 4.6158178461626323e-05, + "loss": 0.0922, + "step": 8694, + "task_loss": 0.057251427322626114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035421546548604965, + "epoch": 8.26, + "learning_rate": 4.615250087986254e-05, + "loss": 0.0374, + "step": 8695, + "task_loss": 0.05525943636894226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03646400570869446, + "epoch": 8.26, + "learning_rate": 4.6146819455645086e-05, + "loss": 0.0426, + "step": 8696, + "task_loss": 0.09792999178171158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08208326995372772, + "epoch": 8.26, + "learning_rate": 4.614113419000604e-05, + "loss": 0.0778, + "step": 8697, + "task_loss": 0.0388401597738266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01685364544391632, + "epoch": 8.26, + "learning_rate": 4.613544508397815e-05, + "loss": 0.0169, + "step": 8698, + "task_loss": 0.017076315358281136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02594437077641487, + "epoch": 8.26, + "learning_rate": 4.6129752138594874e-05, + "loss": 0.0407, + "step": 8699, + "task_loss": 0.17328821122646332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02614816278219223, + "epoch": 8.26, + "learning_rate": 4.612405535489036e-05, + "loss": 0.0368, + "step": 8700, + "task_loss": 0.1322673261165619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037083324044942856, + "epoch": 8.26, + "learning_rate": 4.611835473389945e-05, + "loss": 0.034, + "step": 8701, + "task_loss": 0.005980234593153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019921965897083282, + "epoch": 8.26, + "learning_rate": 4.61126502766577e-05, + "loss": 0.0183, + "step": 8702, + "task_loss": 0.0037397872656583786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07418224960565567, + "epoch": 8.26, + "learning_rate": 4.6106941984201344e-05, + "loss": 0.0676, + "step": 8703, + "task_loss": 0.008437564596533775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0171127300709486, + "epoch": 8.27, + "learning_rate": 4.610122985756733e-05, + "loss": 0.0281, + "step": 8704, + "task_loss": 0.12655523419380188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041877150535583496, + "epoch": 8.27, + "learning_rate": 4.609551389779328e-05, + "loss": 0.0504, + "step": 8705, + "task_loss": 0.12753605842590332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09651815891265869, + "epoch": 8.27, + "learning_rate": 4.6089794105917544e-05, + "loss": 0.0924, + "step": 8706, + "task_loss": 0.05498852580785751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026990918442606926, + "epoch": 8.27, + "learning_rate": 4.6084070482979135e-05, + "loss": 0.0247, + "step": 8707, + "task_loss": 0.00403929129242897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01692625693976879, + "epoch": 8.27, + "learning_rate": 4.607834303001778e-05, + "loss": 0.0239, + "step": 8708, + "task_loss": 0.08626563102006912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02325316146016121, + "epoch": 8.27, + "learning_rate": 4.60726117480739e-05, + "loss": 0.0215, + "step": 8709, + "task_loss": 0.005591306835412979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012971121817827225, + "epoch": 8.27, + "learning_rate": 4.6066876638188604e-05, + "loss": 0.0121, + "step": 8710, + "task_loss": 0.0038487426936626434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10507965087890625, + "epoch": 8.27, + "learning_rate": 4.606113770140371e-05, + "loss": 0.1015, + "step": 8711, + "task_loss": 0.06916863471269608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04460505023598671, + "epoch": 8.27, + "learning_rate": 4.605539493876173e-05, + "loss": 0.0626, + "step": 8712, + "task_loss": 0.224493145942688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13093966245651245, + "epoch": 8.27, + "learning_rate": 4.604964835130585e-05, + "loss": 0.1438, + "step": 8713, + "task_loss": 0.2592054605484009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.054761841893196106, + "epoch": 8.28, + "learning_rate": 4.6043897940079964e-05, + "loss": 0.0584, + "step": 8714, + "task_loss": 0.0908980518579483 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10678750276565552, + "epoch": 8.28, + "learning_rate": 4.603814370612867e-05, + "loss": 0.1026, + "step": 8715, + "task_loss": 0.06508254259824753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04329557716846466, + "epoch": 8.28, + "learning_rate": 4.603238565049726e-05, + "loss": 0.047, + "step": 8716, + "task_loss": 0.08073478937149048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01392638310790062, + "epoch": 8.28, + "learning_rate": 4.60266237742317e-05, + "loss": 0.013, + "step": 8717, + "task_loss": 0.00464806891977787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1740546077489853, + "epoch": 8.28, + "learning_rate": 4.602085807837866e-05, + "loss": 0.1735, + "step": 8718, + "task_loss": 0.1681438386440277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1236797422170639, + "epoch": 8.28, + "learning_rate": 4.601508856398552e-05, + "loss": 0.1305, + "step": 8719, + "task_loss": 0.19167181849479675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043940044939517975, + "epoch": 8.28, + "learning_rate": 4.6009315232100324e-05, + "loss": 0.0636, + "step": 8720, + "task_loss": 0.2409691959619522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09241293370723724, + "epoch": 8.28, + "learning_rate": 4.600353808377184e-05, + "loss": 0.0951, + "step": 8721, + "task_loss": 0.119747593998909 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020489653572440147, + "epoch": 8.28, + "learning_rate": 4.599775712004951e-05, + "loss": 0.0191, + "step": 8722, + "task_loss": 0.0063454341143369675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09082825481891632, + "epoch": 8.28, + "learning_rate": 4.599197234198347e-05, + "loss": 0.0912, + "step": 8723, + "task_loss": 0.09437683969736099 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029831204563379288, + "epoch": 8.28, + "learning_rate": 4.5986183750624555e-05, + "loss": 0.0506, + "step": 8724, + "task_loss": 0.23745323717594147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06204962357878685, + "epoch": 8.29, + "learning_rate": 4.5980391347024296e-05, + "loss": 0.062, + "step": 8725, + "task_loss": 0.061779238283634186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03659776970744133, + "epoch": 8.29, + "learning_rate": 4.59745951322349e-05, + "loss": 0.0351, + "step": 8726, + "task_loss": 0.02147502824664116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024865522980690002, + "epoch": 8.29, + "learning_rate": 4.596879510730929e-05, + "loss": 0.0345, + "step": 8727, + "task_loss": 0.1214059591293335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04416849464178085, + "epoch": 8.29, + "learning_rate": 4.596299127330106e-05, + "loss": 0.0599, + "step": 8728, + "task_loss": 0.20144742727279663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06300773471593857, + "epoch": 8.29, + "learning_rate": 4.59571836312645e-05, + "loss": 0.0675, + "step": 8729, + "task_loss": 0.10772567242383957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03947564959526062, + "epoch": 8.29, + "learning_rate": 4.595137218225461e-05, + "loss": 0.0452, + "step": 8730, + "task_loss": 0.09653180092573166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021150944754481316, + "epoch": 8.29, + "learning_rate": 4.594555692732706e-05, + "loss": 0.0268, + "step": 8731, + "task_loss": 0.07796701788902283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17825676500797272, + "epoch": 8.29, + "learning_rate": 4.593973786753821e-05, + "loss": 0.1831, + "step": 8732, + "task_loss": 0.22676941752433777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012715199962258339, + "epoch": 8.29, + "learning_rate": 4.593391500394514e-05, + "loss": 0.0187, + "step": 8733, + "task_loss": 0.07283098250627518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13977791368961334, + "epoch": 8.29, + "learning_rate": 4.5928088337605586e-05, + "loss": 0.1398, + "step": 8734, + "task_loss": 0.1397220343351364 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14722847938537598, + "epoch": 8.3, + "learning_rate": 4.5922257869578e-05, + "loss": 0.1469, + "step": 8735, + "task_loss": 0.14371134340763092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15708275139331818, + "epoch": 8.3, + "learning_rate": 4.5916423600921496e-05, + "loss": 0.1519, + "step": 8736, + "task_loss": 0.10510636121034622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057722412049770355, + "epoch": 8.3, + "learning_rate": 4.591058553269593e-05, + "loss": 0.0712, + "step": 8737, + "task_loss": 0.1927632987499237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04139891266822815, + "epoch": 8.3, + "learning_rate": 4.590474366596178e-05, + "loss": 0.0431, + "step": 8738, + "task_loss": 0.05822301283478737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021710364148020744, + "epoch": 8.3, + "learning_rate": 4.589889800178026e-05, + "loss": 0.0253, + "step": 8739, + "task_loss": 0.05777839943766594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022335700690746307, + "epoch": 8.3, + "learning_rate": 4.589304854121329e-05, + "loss": 0.0211, + "step": 8740, + "task_loss": 0.009688341990113258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026100587099790573, + "epoch": 8.3, + "learning_rate": 4.588719528532342e-05, + "loss": 0.0327, + "step": 8741, + "task_loss": 0.09173382818698883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12068310379981995, + "epoch": 8.3, + "learning_rate": 4.588133823517392e-05, + "loss": 0.115, + "step": 8742, + "task_loss": 0.06337210536003113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025651680305600166, + "epoch": 8.3, + "learning_rate": 4.587547739182878e-05, + "loss": 0.0238, + "step": 8743, + "task_loss": 0.006972752511501312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11778855323791504, + "epoch": 8.3, + "learning_rate": 4.586961275635263e-05, + "loss": 0.1207, + "step": 8744, + "task_loss": 0.1473734825849533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14205443859100342, + "epoch": 8.3, + "learning_rate": 4.586374432981081e-05, + "loss": 0.1341, + "step": 8745, + "task_loss": 0.06203455477952957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018492717295885086, + "epoch": 8.31, + "learning_rate": 4.585787211326935e-05, + "loss": 0.0172, + "step": 8746, + "task_loss": 0.0057936906814575195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10602115839719772, + "epoch": 8.31, + "learning_rate": 4.5851996107794975e-05, + "loss": 0.1199, + "step": 8747, + "task_loss": 0.24453692138195038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022601492702960968, + "epoch": 8.31, + "learning_rate": 4.584611631445508e-05, + "loss": 0.0279, + "step": 8748, + "task_loss": 0.07518120110034943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05565394088625908, + "epoch": 8.31, + "learning_rate": 4.5840232734317754e-05, + "loss": 0.0597, + "step": 8749, + "task_loss": 0.09581439942121506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035259947180747986, + "epoch": 8.31, + "learning_rate": 4.583434536845179e-05, + "loss": 0.0394, + "step": 8750, + "task_loss": 0.07706654071807861 + }, + { + "epoch": 8.31, + "eval_accuracy": 0.8956422018348624, + "eval_loss": 0.4291454553604126, + "eval_runtime": 17.9719, + "eval_samples_per_second": 48.52, + "eval_steps_per_second": 6.065, + "step": 8750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06929297000169754, + "epoch": 8.31, + "learning_rate": 4.5828454217926654e-05, + "loss": 0.0863, + "step": 8751, + "task_loss": 0.23925699293613434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06375081837177277, + "epoch": 8.31, + "learning_rate": 4.5822559283812496e-05, + "loss": 0.0626, + "step": 8752, + "task_loss": 0.05265332758426666 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11460275948047638, + "epoch": 8.31, + "learning_rate": 4.581666056718016e-05, + "loss": 0.1133, + "step": 8753, + "task_loss": 0.10144174098968506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0539216548204422, + "epoch": 8.31, + "learning_rate": 4.5810758069101175e-05, + "loss": 0.052, + "step": 8754, + "task_loss": 0.034788183867931366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030059363692998886, + "epoch": 8.31, + "learning_rate": 4.580485179064777e-05, + "loss": 0.0336, + "step": 8755, + "task_loss": 0.06580464541912079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1027214527130127, + "epoch": 8.32, + "learning_rate": 4.579894173289284e-05, + "loss": 0.1106, + "step": 8756, + "task_loss": 0.18194906413555145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08241204172372818, + "epoch": 8.32, + "learning_rate": 4.579302789690997e-05, + "loss": 0.0806, + "step": 8757, + "task_loss": 0.0640886202454567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016954604536294937, + "epoch": 8.32, + "learning_rate": 4.578711028377344e-05, + "loss": 0.0219, + "step": 8758, + "task_loss": 0.06677691638469696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02651621401309967, + "epoch": 8.32, + "learning_rate": 4.578118889455821e-05, + "loss": 0.0253, + "step": 8759, + "task_loss": 0.014431167393922806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03452229127287865, + "epoch": 8.32, + "learning_rate": 4.577526373033994e-05, + "loss": 0.0328, + "step": 8760, + "task_loss": 0.017721977084875107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08960497379302979, + "epoch": 8.32, + "learning_rate": 4.576933479219496e-05, + "loss": 0.0924, + "step": 8761, + "task_loss": 0.11729642748832703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07891800254583359, + "epoch": 8.32, + "learning_rate": 4.5763402081200294e-05, + "loss": 0.0769, + "step": 8762, + "task_loss": 0.05859693884849548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02912452071905136, + "epoch": 8.32, + "learning_rate": 4.575746559843364e-05, + "loss": 0.0273, + "step": 8763, + "task_loss": 0.010460572317242622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028819631785154343, + "epoch": 8.32, + "learning_rate": 4.5751525344973384e-05, + "loss": 0.0329, + "step": 8764, + "task_loss": 0.06981261819601059 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05606947839260101, + "epoch": 8.32, + "learning_rate": 4.5745581321898615e-05, + "loss": 0.0654, + "step": 8765, + "task_loss": 0.14961570501327515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019708652049303055, + "epoch": 8.32, + "learning_rate": 4.5739633530289085e-05, + "loss": 0.0288, + "step": 8766, + "task_loss": 0.1107863336801529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043510615825653076, + "epoch": 8.33, + "learning_rate": 4.573368197122524e-05, + "loss": 0.0518, + "step": 8767, + "task_loss": 0.12616392970085144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04988428205251694, + "epoch": 8.33, + "learning_rate": 4.572772664578821e-05, + "loss": 0.0522, + "step": 8768, + "task_loss": 0.07312482595443726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047488268464803696, + "epoch": 8.33, + "learning_rate": 4.572176755505981e-05, + "loss": 0.0509, + "step": 8769, + "task_loss": 0.08157598972320557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05487481504678726, + "epoch": 8.33, + "learning_rate": 4.571580470012254e-05, + "loss": 0.0548, + "step": 8770, + "task_loss": 0.05453791469335556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02108554169535637, + "epoch": 8.33, + "learning_rate": 4.5709838082059574e-05, + "loss": 0.0193, + "step": 8771, + "task_loss": 0.002928614616394043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09459959715604782, + "epoch": 8.33, + "learning_rate": 4.570386770195478e-05, + "loss": 0.0858, + "step": 8772, + "task_loss": 0.006600510329008102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028554365038871765, + "epoch": 8.33, + "learning_rate": 4.569789356089271e-05, + "loss": 0.0308, + "step": 8773, + "task_loss": 0.05108209699392319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051416561007499695, + "epoch": 8.33, + "learning_rate": 4.569191565995859e-05, + "loss": 0.0613, + "step": 8774, + "task_loss": 0.15025511384010315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020000001415610313, + "epoch": 8.33, + "learning_rate": 4.568593400023834e-05, + "loss": 0.0228, + "step": 8775, + "task_loss": 0.047657888382673264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03664660453796387, + "epoch": 8.33, + "learning_rate": 4.567994858281855e-05, + "loss": 0.0338, + "step": 8776, + "task_loss": 0.008631901815533638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08228196948766708, + "epoch": 8.34, + "learning_rate": 4.56739594087865e-05, + "loss": 0.0783, + "step": 8777, + "task_loss": 0.042297106236219406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035452522337436676, + "epoch": 8.34, + "learning_rate": 4.566796647923017e-05, + "loss": 0.0449, + "step": 8778, + "task_loss": 0.13000640273094177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011653019115328789, + "epoch": 8.34, + "learning_rate": 4.566196979523818e-05, + "loss": 0.0197, + "step": 8779, + "task_loss": 0.09172951430082321 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08766936510801315, + "epoch": 8.34, + "learning_rate": 4.5655969357899874e-05, + "loss": 0.0842, + "step": 8780, + "task_loss": 0.05248169228434563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08307437598705292, + "epoch": 8.34, + "learning_rate": 4.564996516830525e-05, + "loss": 0.0812, + "step": 8781, + "task_loss": 0.06468552350997925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01311748381704092, + "epoch": 8.34, + "learning_rate": 4.564395722754501e-05, + "loss": 0.0121, + "step": 8782, + "task_loss": 0.0029160063713788986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022205941379070282, + "epoch": 8.34, + "learning_rate": 4.56379455367105e-05, + "loss": 0.0262, + "step": 8783, + "task_loss": 0.061669085174798965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029047932475805283, + "epoch": 8.34, + "learning_rate": 4.563193009689381e-05, + "loss": 0.0371, + "step": 8784, + "task_loss": 0.10945844650268555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019876539707183838, + "epoch": 8.34, + "learning_rate": 4.562591090918764e-05, + "loss": 0.0275, + "step": 8785, + "task_loss": 0.09562841057777405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01689576543867588, + "epoch": 8.34, + "learning_rate": 4.561988797468542e-05, + "loss": 0.0234, + "step": 8786, + "task_loss": 0.08180932700634003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023341480642557144, + "epoch": 8.34, + "learning_rate": 4.561386129448125e-05, + "loss": 0.0305, + "step": 8787, + "task_loss": 0.09533084183931351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015885740518569946, + "epoch": 8.35, + "learning_rate": 4.5607830869669885e-05, + "loss": 0.0212, + "step": 8788, + "task_loss": 0.06913591921329498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026201194152235985, + "epoch": 8.35, + "learning_rate": 4.560179670134681e-05, + "loss": 0.025, + "step": 8789, + "task_loss": 0.013938097283244133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08920443058013916, + "epoch": 8.35, + "learning_rate": 4.559575879060813e-05, + "loss": 0.0968, + "step": 8790, + "task_loss": 0.16520185768604279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1296762377023697, + "epoch": 8.35, + "learning_rate": 4.5589717138550685e-05, + "loss": 0.1242, + "step": 8791, + "task_loss": 0.07465289533138275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016865486279129982, + "epoch": 8.35, + "learning_rate": 4.5583671746271964e-05, + "loss": 0.0156, + "step": 8792, + "task_loss": 0.00429266132414341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030185125768184662, + "epoch": 8.35, + "learning_rate": 4.557762261487013e-05, + "loss": 0.0461, + "step": 8793, + "task_loss": 0.18932506442070007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039688460528850555, + "epoch": 8.35, + "learning_rate": 4.557156974544404e-05, + "loss": 0.037, + "step": 8794, + "task_loss": 0.013088133186101913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06738491356372833, + "epoch": 8.35, + "learning_rate": 4.5565513139093244e-05, + "loss": 0.0756, + "step": 8795, + "task_loss": 0.1495150625705719 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05773467570543289, + "epoch": 8.35, + "learning_rate": 4.5559452796917936e-05, + "loss": 0.0613, + "step": 8796, + "task_loss": 0.09331423789262772 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026063233613967896, + "epoch": 8.35, + "learning_rate": 4.555338872001901e-05, + "loss": 0.0311, + "step": 8797, + "task_loss": 0.07620520889759064 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018153179436922073, + "epoch": 8.36, + "learning_rate": 4.554732090949805e-05, + "loss": 0.0175, + "step": 8798, + "task_loss": 0.011946845799684525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03151557967066765, + "epoch": 8.36, + "learning_rate": 4.5541249366457276e-05, + "loss": 0.0288, + "step": 8799, + "task_loss": 0.0040736570954322815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01914232410490513, + "epoch": 8.36, + "learning_rate": 4.5535174091999636e-05, + "loss": 0.0221, + "step": 8800, + "task_loss": 0.04857531934976578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023833172395825386, + "epoch": 8.36, + "learning_rate": 4.552909508722871e-05, + "loss": 0.0246, + "step": 8801, + "task_loss": 0.03120681270956993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021301060914993286, + "epoch": 8.36, + "learning_rate": 4.55230123532488e-05, + "loss": 0.0197, + "step": 8802, + "task_loss": 0.005311897024512291 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034260865300893784, + "epoch": 8.36, + "learning_rate": 4.551692589116486e-05, + "loss": 0.0544, + "step": 8803, + "task_loss": 0.23611676692962646 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036078743636608124, + "epoch": 8.36, + "learning_rate": 4.551083570208252e-05, + "loss": 0.0371, + "step": 8804, + "task_loss": 0.0467095673084259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14268112182617188, + "epoch": 8.36, + "learning_rate": 4.550474178710809e-05, + "loss": 0.1415, + "step": 8805, + "task_loss": 0.1312038004398346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014835448935627937, + "epoch": 8.36, + "learning_rate": 4.549864414734856e-05, + "loss": 0.0139, + "step": 8806, + "task_loss": 0.005272580310702324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06314484775066376, + "epoch": 8.36, + "learning_rate": 4.54925427839116e-05, + "loss": 0.0702, + "step": 8807, + "task_loss": 0.13396084308624268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09572551399469376, + "epoch": 8.36, + "learning_rate": 4.548643769790556e-05, + "loss": 0.0963, + "step": 8808, + "task_loss": 0.10133033990859985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017699236050248146, + "epoch": 8.37, + "learning_rate": 4.548032889043944e-05, + "loss": 0.0374, + "step": 8809, + "task_loss": 0.21495309472084045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05119137093424797, + "epoch": 8.37, + "learning_rate": 4.547421636262294e-05, + "loss": 0.05, + "step": 8810, + "task_loss": 0.039462827146053314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024145277217030525, + "epoch": 8.37, + "learning_rate": 4.546810011556644e-05, + "loss": 0.0334, + "step": 8811, + "task_loss": 0.11649482697248459 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10837046056985855, + "epoch": 8.37, + "learning_rate": 4.546198015038097e-05, + "loss": 0.1182, + "step": 8812, + "task_loss": 0.20654115080833435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10691244900226593, + "epoch": 8.37, + "learning_rate": 4.545585646817826e-05, + "loss": 0.1056, + "step": 8813, + "task_loss": 0.09399950504302979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10578563064336777, + "epoch": 8.37, + "learning_rate": 4.544972907007071e-05, + "loss": 0.1143, + "step": 8814, + "task_loss": 0.1912904679775238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07613591104745865, + "epoch": 8.37, + "learning_rate": 4.544359795717139e-05, + "loss": 0.0696, + "step": 8815, + "task_loss": 0.010670226067304611 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03094104677438736, + "epoch": 8.37, + "learning_rate": 4.543746313059404e-05, + "loss": 0.0365, + "step": 8816, + "task_loss": 0.08633188903331757 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0631537064909935, + "epoch": 8.37, + "learning_rate": 4.5431324591453094e-05, + "loss": 0.0812, + "step": 8817, + "task_loss": 0.2431773990392685 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034764595329761505, + "epoch": 8.37, + "learning_rate": 4.5425182340863626e-05, + "loss": 0.0319, + "step": 8818, + "task_loss": 0.006121266633272171 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035581640899181366, + "epoch": 8.38, + "learning_rate": 4.541903637994142e-05, + "loss": 0.033, + "step": 8819, + "task_loss": 0.009517015889286995 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08049920201301575, + "epoch": 8.38, + "learning_rate": 4.541288670980291e-05, + "loss": 0.0904, + "step": 8820, + "task_loss": 0.17998726665973663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06685604900121689, + "epoch": 8.38, + "learning_rate": 4.540673333156523e-05, + "loss": 0.0659, + "step": 8821, + "task_loss": 0.057141952216625214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0435507632791996, + "epoch": 8.38, + "learning_rate": 4.540057624634616e-05, + "loss": 0.0404, + "step": 8822, + "task_loss": 0.01237713173031807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03415733203291893, + "epoch": 8.38, + "learning_rate": 4.5394415455264164e-05, + "loss": 0.0395, + "step": 8823, + "task_loss": 0.0872575044631958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.060022905468940735, + "epoch": 8.38, + "learning_rate": 4.538825095943838e-05, + "loss": 0.0712, + "step": 8824, + "task_loss": 0.17156316339969635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04664577543735504, + "epoch": 8.38, + "learning_rate": 4.538208275998861e-05, + "loss": 0.05, + "step": 8825, + "task_loss": 0.08014590293169022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08388902246952057, + "epoch": 8.38, + "learning_rate": 4.537591085803535e-05, + "loss": 0.0931, + "step": 8826, + "task_loss": 0.17568424344062805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02333504892885685, + "epoch": 8.38, + "learning_rate": 4.5369735254699754e-05, + "loss": 0.0281, + "step": 8827, + "task_loss": 0.07060873508453369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07975588738918304, + "epoch": 8.38, + "learning_rate": 4.536355595110365e-05, + "loss": 0.0783, + "step": 8828, + "task_loss": 0.06561661511659622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05120707303285599, + "epoch": 8.38, + "learning_rate": 4.5357372948369534e-05, + "loss": 0.0468, + "step": 8829, + "task_loss": 0.007422303780913353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08561455458402634, + "epoch": 8.39, + "learning_rate": 4.535118624762057e-05, + "loss": 0.0831, + "step": 8830, + "task_loss": 0.06040613353252411 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02236170694231987, + "epoch": 8.39, + "learning_rate": 4.534499584998062e-05, + "loss": 0.0205, + "step": 8831, + "task_loss": 0.00412105955183506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07289911806583405, + "epoch": 8.39, + "learning_rate": 4.533880175657419e-05, + "loss": 0.0812, + "step": 8832, + "task_loss": 0.15584085881710052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.103610560297966, + "epoch": 8.39, + "learning_rate": 4.533260396852646e-05, + "loss": 0.1056, + "step": 8833, + "task_loss": 0.12309184670448303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1255352646112442, + "epoch": 8.39, + "learning_rate": 4.532640248696331e-05, + "loss": 0.1261, + "step": 8834, + "task_loss": 0.1312541514635086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028518887236714363, + "epoch": 8.39, + "learning_rate": 4.532019731301125e-05, + "loss": 0.028, + "step": 8835, + "task_loss": 0.02283020317554474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04137152433395386, + "epoch": 8.39, + "learning_rate": 4.531398844779749e-05, + "loss": 0.0637, + "step": 8836, + "task_loss": 0.26472553610801697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13369058072566986, + "epoch": 8.39, + "learning_rate": 4.530777589244989e-05, + "loss": 0.142, + "step": 8837, + "task_loss": 0.21714474260807037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07951021194458008, + "epoch": 8.39, + "learning_rate": 4.5301559648096995e-05, + "loss": 0.0786, + "step": 8838, + "task_loss": 0.0705207884311676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08249984681606293, + "epoch": 8.39, + "learning_rate": 4.529533971586802e-05, + "loss": 0.0865, + "step": 8839, + "task_loss": 0.12243272364139557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06132117658853531, + "epoch": 8.4, + "learning_rate": 4.5289116096892834e-05, + "loss": 0.0566, + "step": 8840, + "task_loss": 0.014352064579725266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035899195820093155, + "epoch": 8.4, + "learning_rate": 4.5282888792302e-05, + "loss": 0.0432, + "step": 8841, + "task_loss": 0.10917837917804718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03924690932035446, + "epoch": 8.4, + "learning_rate": 4.527665780322674e-05, + "loss": 0.042, + "step": 8842, + "task_loss": 0.06675869226455688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08691535890102386, + "epoch": 8.4, + "learning_rate": 4.527042313079893e-05, + "loss": 0.0855, + "step": 8843, + "task_loss": 0.07236799597740173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03491726517677307, + "epoch": 8.4, + "learning_rate": 4.526418477615114e-05, + "loss": 0.0329, + "step": 8844, + "task_loss": 0.014603780582547188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026119505986571312, + "epoch": 8.4, + "learning_rate": 4.525794274041658e-05, + "loss": 0.0403, + "step": 8845, + "task_loss": 0.16816593706607819 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025338448584079742, + "epoch": 8.4, + "learning_rate": 4.5251697024729165e-05, + "loss": 0.0233, + "step": 8846, + "task_loss": 0.0048566292971372604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038359981030225754, + "epoch": 8.4, + "learning_rate": 4.524544763022346e-05, + "loss": 0.0368, + "step": 8847, + "task_loss": 0.022422099485993385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.20507895946502686, + "epoch": 8.4, + "learning_rate": 4.523919455803468e-05, + "loss": 0.1902, + "step": 8848, + "task_loss": 0.05593420937657356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02473161742091179, + "epoch": 8.4, + "learning_rate": 4.5232937809298734e-05, + "loss": 0.0301, + "step": 8849, + "task_loss": 0.07794321328401566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01353430561721325, + "epoch": 8.4, + "learning_rate": 4.5226677385152206e-05, + "loss": 0.0133, + "step": 8850, + "task_loss": 0.011057652533054352 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029346950352191925, + "epoch": 8.41, + "learning_rate": 4.522041328673231e-05, + "loss": 0.0364, + "step": 8851, + "task_loss": 0.09976962208747864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09129992872476578, + "epoch": 8.41, + "learning_rate": 4.521414551517695e-05, + "loss": 0.0922, + "step": 8852, + "task_loss": 0.10062223672866821 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0642009973526001, + "epoch": 8.41, + "learning_rate": 4.520787407162471e-05, + "loss": 0.0722, + "step": 8853, + "task_loss": 0.14382028579711914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02546757459640503, + "epoch": 8.41, + "learning_rate": 4.520159895721483e-05, + "loss": 0.0233, + "step": 8854, + "task_loss": 0.0041826870292425156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1266893744468689, + "epoch": 8.41, + "learning_rate": 4.51953201730872e-05, + "loss": 0.1269, + "step": 8855, + "task_loss": 0.12863604724407196 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04428763687610626, + "epoch": 8.41, + "learning_rate": 4.51890377203824e-05, + "loss": 0.0412, + "step": 8856, + "task_loss": 0.013243492692708969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008449184708297253, + "epoch": 8.41, + "learning_rate": 4.518275160024167e-05, + "loss": 0.0082, + "step": 8857, + "task_loss": 0.006423516198992729 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08089472353458405, + "epoch": 8.41, + "learning_rate": 4.5176461813806904e-05, + "loss": 0.0796, + "step": 8858, + "task_loss": 0.0684363842010498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03204871341586113, + "epoch": 8.41, + "learning_rate": 4.5170168362220686e-05, + "loss": 0.0306, + "step": 8859, + "task_loss": 0.017244910821318626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04458446055650711, + "epoch": 8.41, + "learning_rate": 4.516387124662624e-05, + "loss": 0.048, + "step": 8860, + "task_loss": 0.07903842628002167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01822434365749359, + "epoch": 8.42, + "learning_rate": 4.5157570468167464e-05, + "loss": 0.0169, + "step": 8861, + "task_loss": 0.005464507266879082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03550713509321213, + "epoch": 8.42, + "learning_rate": 4.5151266027988946e-05, + "loss": 0.0333, + "step": 8862, + "task_loss": 0.013436004519462585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028216678649187088, + "epoch": 8.42, + "learning_rate": 4.51449579272359e-05, + "loss": 0.0333, + "step": 8863, + "task_loss": 0.07930795848369598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01838553324341774, + "epoch": 8.42, + "learning_rate": 4.5138646167054224e-05, + "loss": 0.017, + "step": 8864, + "task_loss": 0.004559867084026337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08109049499034882, + "epoch": 8.42, + "learning_rate": 4.513233074859049e-05, + "loss": 0.0859, + "step": 8865, + "task_loss": 0.12965066730976105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016361292451620102, + "epoch": 8.42, + "learning_rate": 4.512601167299191e-05, + "loss": 0.0212, + "step": 8866, + "task_loss": 0.06487929821014404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011701793409883976, + "epoch": 8.42, + "learning_rate": 4.511968894140639e-05, + "loss": 0.0264, + "step": 8867, + "task_loss": 0.15838071703910828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06448012590408325, + "epoch": 8.42, + "learning_rate": 4.511336255498247e-05, + "loss": 0.0657, + "step": 8868, + "task_loss": 0.07717397809028625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028120938688516617, + "epoch": 8.42, + "learning_rate": 4.510703251486937e-05, + "loss": 0.026, + "step": 8869, + "task_loss": 0.006957884877920151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10039125382900238, + "epoch": 8.42, + "learning_rate": 4.5100698822216984e-05, + "loss": 0.1005, + "step": 8870, + "task_loss": 0.1014624685049057 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019519591704010963, + "epoch": 8.42, + "learning_rate": 4.509436147817585e-05, + "loss": 0.018, + "step": 8871, + "task_loss": 0.0039330217987298965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.098009392619133, + "epoch": 8.43, + "learning_rate": 4.5088020483897184e-05, + "loss": 0.102, + "step": 8872, + "task_loss": 0.13752353191375732 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09184578061103821, + "epoch": 8.43, + "learning_rate": 4.508167584053285e-05, + "loss": 0.0878, + "step": 8873, + "task_loss": 0.05179011821746826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03356043994426727, + "epoch": 8.43, + "learning_rate": 4.507532754923537e-05, + "loss": 0.0388, + "step": 8874, + "task_loss": 0.0859462320804596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08062370121479034, + "epoch": 8.43, + "learning_rate": 4.506897561115797e-05, + "loss": 0.0896, + "step": 8875, + "task_loss": 0.17005644738674164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0661439374089241, + "epoch": 8.43, + "learning_rate": 4.506262002745449e-05, + "loss": 0.0845, + "step": 8876, + "task_loss": 0.25010010600090027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04141310602426529, + "epoch": 8.43, + "learning_rate": 4.505626079927947e-05, + "loss": 0.0399, + "step": 8877, + "task_loss": 0.025958728045225143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05498037487268448, + "epoch": 8.43, + "learning_rate": 4.504989792778808e-05, + "loss": 0.0538, + "step": 8878, + "task_loss": 0.04316407069563866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05957046523690224, + "epoch": 8.43, + "learning_rate": 4.504353141413616e-05, + "loss": 0.0686, + "step": 8879, + "task_loss": 0.14945709705352783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03215145319700241, + "epoch": 8.43, + "learning_rate": 4.5037161259480246e-05, + "loss": 0.0435, + "step": 8880, + "task_loss": 0.14541016519069672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03889644145965576, + "epoch": 8.43, + "learning_rate": 4.5030787464977476e-05, + "loss": 0.036, + "step": 8881, + "task_loss": 0.009727858006954193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028448857367038727, + "epoch": 8.43, + "learning_rate": 4.50244100317857e-05, + "loss": 0.0262, + "step": 8882, + "task_loss": 0.005950525403022766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05188886076211929, + "epoch": 8.44, + "learning_rate": 4.5018028961063394e-05, + "loss": 0.0695, + "step": 8883, + "task_loss": 0.22816702723503113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0323837585747242, + "epoch": 8.44, + "learning_rate": 4.501164425396973e-05, + "loss": 0.0389, + "step": 8884, + "task_loss": 0.0973554477095604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06097978726029396, + "epoch": 8.44, + "learning_rate": 4.5005255911664507e-05, + "loss": 0.0569, + "step": 8885, + "task_loss": 0.02063034474849701 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021500347182154655, + "epoch": 8.44, + "learning_rate": 4.49988639353082e-05, + "loss": 0.0345, + "step": 8886, + "task_loss": 0.1510607749223709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.053475480526685715, + "epoch": 8.44, + "learning_rate": 4.4992468326061944e-05, + "loss": 0.0527, + "step": 8887, + "task_loss": 0.04602964594960213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026177948340773582, + "epoch": 8.44, + "learning_rate": 4.498606908508754e-05, + "loss": 0.0313, + "step": 8888, + "task_loss": 0.07776033878326416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1273716241121292, + "epoch": 8.44, + "learning_rate": 4.4979666213547414e-05, + "loss": 0.1253, + "step": 8889, + "task_loss": 0.10640902817249298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028834780678153038, + "epoch": 8.44, + "learning_rate": 4.497325971260471e-05, + "loss": 0.0271, + "step": 8890, + "task_loss": 0.011695507913827896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03636486828327179, + "epoch": 8.44, + "learning_rate": 4.496684958342319e-05, + "loss": 0.0339, + "step": 8891, + "task_loss": 0.011735286563634872 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12257404625415802, + "epoch": 8.44, + "learning_rate": 4.4960435827167266e-05, + "loss": 0.123, + "step": 8892, + "task_loss": 0.12667058408260345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027225444093346596, + "epoch": 8.45, + "learning_rate": 4.495401844500205e-05, + "loss": 0.0297, + "step": 8893, + "task_loss": 0.051733896136283875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022315477952361107, + "epoch": 8.45, + "learning_rate": 4.494759743809329e-05, + "loss": 0.025, + "step": 8894, + "task_loss": 0.04938486963510513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05692111700773239, + "epoch": 8.45, + "learning_rate": 4.494117280760739e-05, + "loss": 0.0631, + "step": 8895, + "task_loss": 0.11854865401983261 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017605263739824295, + "epoch": 8.45, + "learning_rate": 4.49347445547114e-05, + "loss": 0.0404, + "step": 8896, + "task_loss": 0.24594470858573914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13257010281085968, + "epoch": 8.45, + "learning_rate": 4.4928312680573064e-05, + "loss": 0.1377, + "step": 8897, + "task_loss": 0.18380433320999146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0691106989979744, + "epoch": 8.45, + "learning_rate": 4.492187718636075e-05, + "loss": 0.0788, + "step": 8898, + "task_loss": 0.16619594395160675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03699222952127457, + "epoch": 8.45, + "learning_rate": 4.49154380732435e-05, + "loss": 0.048, + "step": 8899, + "task_loss": 0.14703547954559326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11394920945167542, + "epoch": 8.45, + "learning_rate": 4.490899534239101e-05, + "loss": 0.1091, + "step": 8900, + "task_loss": 0.06529615074396133 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05552458390593529, + "epoch": 8.45, + "learning_rate": 4.490254899497364e-05, + "loss": 0.0599, + "step": 8901, + "task_loss": 0.0996512770652771 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0950566753745079, + "epoch": 8.45, + "learning_rate": 4.4896099032162386e-05, + "loss": 0.0994, + "step": 8902, + "task_loss": 0.13881003856658936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029427465051412582, + "epoch": 8.45, + "learning_rate": 4.488964545512892e-05, + "loss": 0.0426, + "step": 8903, + "task_loss": 0.16067925095558167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04379139095544815, + "epoch": 8.46, + "learning_rate": 4.488318826504557e-05, + "loss": 0.0431, + "step": 8904, + "task_loss": 0.03657126426696777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.053460512310266495, + "epoch": 8.46, + "learning_rate": 4.4876727463085324e-05, + "loss": 0.0526, + "step": 8905, + "task_loss": 0.04512707516551018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036254942417144775, + "epoch": 8.46, + "learning_rate": 4.487026305042179e-05, + "loss": 0.0371, + "step": 8906, + "task_loss": 0.045052558183670044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015025094151496887, + "epoch": 8.46, + "learning_rate": 4.4863795028229286e-05, + "loss": 0.0215, + "step": 8907, + "task_loss": 0.07951157540082932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01650727353990078, + "epoch": 8.46, + "learning_rate": 4.4857323397682746e-05, + "loss": 0.0306, + "step": 8908, + "task_loss": 0.15717414021492004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06591572612524033, + "epoch": 8.46, + "learning_rate": 4.485084815995778e-05, + "loss": 0.064, + "step": 8909, + "task_loss": 0.04637327417731285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09855660796165466, + "epoch": 8.46, + "learning_rate": 4.484436931623064e-05, + "loss": 0.0949, + "step": 8910, + "task_loss": 0.06240401789546013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027789302170276642, + "epoch": 8.46, + "learning_rate": 4.4837886867678245e-05, + "loss": 0.0327, + "step": 8911, + "task_loss": 0.07687053829431534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07295016199350357, + "epoch": 8.46, + "learning_rate": 4.4831400815478164e-05, + "loss": 0.0882, + "step": 8912, + "task_loss": 0.2257649004459381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0313107855618, + "epoch": 8.46, + "learning_rate": 4.482491116080861e-05, + "loss": 0.0417, + "step": 8913, + "task_loss": 0.1352921426296234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020768383517861366, + "epoch": 8.47, + "learning_rate": 4.4818417904848466e-05, + "loss": 0.036, + "step": 8914, + "task_loss": 0.17357057332992554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11295350641012192, + "epoch": 8.47, + "learning_rate": 4.481192104877726e-05, + "loss": 0.1166, + "step": 8915, + "task_loss": 0.14907796680927277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021822160109877586, + "epoch": 8.47, + "learning_rate": 4.480542059377519e-05, + "loss": 0.0222, + "step": 8916, + "task_loss": 0.025608519092202187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02147882990539074, + "epoch": 8.47, + "learning_rate": 4.479891654102307e-05, + "loss": 0.02, + "step": 8917, + "task_loss": 0.006244117394089699 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05350108444690704, + "epoch": 8.47, + "learning_rate": 4.4792408891702426e-05, + "loss": 0.0564, + "step": 8918, + "task_loss": 0.08280838280916214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09106861054897308, + "epoch": 8.47, + "learning_rate": 4.4785897646995376e-05, + "loss": 0.0881, + "step": 8919, + "task_loss": 0.06174633651971817 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028143450617790222, + "epoch": 8.47, + "learning_rate": 4.477938280808473e-05, + "loss": 0.0261, + "step": 8920, + "task_loss": 0.007531605660915375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028097132220864296, + "epoch": 8.47, + "learning_rate": 4.4772864376153936e-05, + "loss": 0.0306, + "step": 8921, + "task_loss": 0.05265646427869797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029321586713194847, + "epoch": 8.47, + "learning_rate": 4.4766342352387106e-05, + "loss": 0.027, + "step": 8922, + "task_loss": 0.006089037284255028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06918927282094955, + "epoch": 8.47, + "learning_rate": 4.475981673796899e-05, + "loss": 0.0718, + "step": 8923, + "task_loss": 0.09525958448648453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07651045173406601, + "epoch": 8.47, + "learning_rate": 4.475328753408499e-05, + "loss": 0.0909, + "step": 8924, + "task_loss": 0.22058546543121338 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12579286098480225, + "epoch": 8.48, + "learning_rate": 4.474675474192119e-05, + "loss": 0.1404, + "step": 8925, + "task_loss": 0.2715792953968048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0216844342648983, + "epoch": 8.48, + "learning_rate": 4.4740218362664276e-05, + "loss": 0.0204, + "step": 8926, + "task_loss": 0.008529262617230415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07038342207670212, + "epoch": 8.48, + "learning_rate": 4.473367839750165e-05, + "loss": 0.0718, + "step": 8927, + "task_loss": 0.0848298892378807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059562116861343384, + "epoch": 8.48, + "learning_rate": 4.4727134847621276e-05, + "loss": 0.0718, + "step": 8928, + "task_loss": 0.18145687878131866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07572756707668304, + "epoch": 8.48, + "learning_rate": 4.4720587714211863e-05, + "loss": 0.0747, + "step": 8929, + "task_loss": 0.06551861763000488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06999503076076508, + "epoch": 8.48, + "learning_rate": 4.471403699846272e-05, + "loss": 0.0659, + "step": 8930, + "task_loss": 0.0290969330817461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028632836416363716, + "epoch": 8.48, + "learning_rate": 4.470748270156381e-05, + "loss": 0.0337, + "step": 8931, + "task_loss": 0.07966382801532745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10344910621643066, + "epoch": 8.48, + "learning_rate": 4.4700924824705745e-05, + "loss": 0.1, + "step": 8932, + "task_loss": 0.06940016150474548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09662455320358276, + "epoch": 8.48, + "learning_rate": 4.469436336907982e-05, + "loss": 0.0968, + "step": 8933, + "task_loss": 0.09881898760795593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08603687584400177, + "epoch": 8.48, + "learning_rate": 4.4687798335877936e-05, + "loss": 0.0844, + "step": 8934, + "task_loss": 0.06977303326129913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05292690172791481, + "epoch": 8.49, + "learning_rate": 4.4681229726292664e-05, + "loss": 0.0519, + "step": 8935, + "task_loss": 0.04311305657029152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019087469205260277, + "epoch": 8.49, + "learning_rate": 4.4674657541517227e-05, + "loss": 0.0191, + "step": 8936, + "task_loss": 0.018771233037114143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01726618967950344, + "epoch": 8.49, + "learning_rate": 4.466808178274549e-05, + "loss": 0.0225, + "step": 8937, + "task_loss": 0.06974489986896515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07852751016616821, + "epoch": 8.49, + "learning_rate": 4.4661502451171975e-05, + "loss": 0.0891, + "step": 8938, + "task_loss": 0.18472985923290253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015991097316145897, + "epoch": 8.49, + "learning_rate": 4.465491954799186e-05, + "loss": 0.0148, + "step": 8939, + "task_loss": 0.004459971562027931 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07318315654993057, + "epoch": 8.49, + "learning_rate": 4.4648333074400936e-05, + "loss": 0.0713, + "step": 8940, + "task_loss": 0.054741598665714264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04243193194270134, + "epoch": 8.49, + "learning_rate": 4.464174303159569e-05, + "loss": 0.0503, + "step": 8941, + "task_loss": 0.12083330750465393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.18897034227848053, + "epoch": 8.49, + "learning_rate": 4.463514942077323e-05, + "loss": 0.1905, + "step": 8942, + "task_loss": 0.20461271703243256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09098904579877853, + "epoch": 8.49, + "learning_rate": 4.4628552243131304e-05, + "loss": 0.0979, + "step": 8943, + "task_loss": 0.16002698242664337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021895185112953186, + "epoch": 8.49, + "learning_rate": 4.462195149986833e-05, + "loss": 0.0259, + "step": 8944, + "task_loss": 0.061538372188806534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05251630023121834, + "epoch": 8.49, + "learning_rate": 4.4615347192183375e-05, + "loss": 0.0511, + "step": 8945, + "task_loss": 0.038724854588508606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0883500799536705, + "epoch": 8.5, + "learning_rate": 4.4608739321276126e-05, + "loss": 0.0855, + "step": 8946, + "task_loss": 0.060159243643283844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0961025282740593, + "epoch": 8.5, + "learning_rate": 4.4602127888346944e-05, + "loss": 0.0918, + "step": 8947, + "task_loss": 0.05281839519739151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07368957996368408, + "epoch": 8.5, + "learning_rate": 4.459551289459684e-05, + "loss": 0.0714, + "step": 8948, + "task_loss": 0.05031800642609596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06434916704893112, + "epoch": 8.5, + "learning_rate": 4.4588894341227426e-05, + "loss": 0.0588, + "step": 8949, + "task_loss": 0.00913977436721325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04223328083753586, + "epoch": 8.5, + "learning_rate": 4.4582272229441024e-05, + "loss": 0.0406, + "step": 8950, + "task_loss": 0.02632623352110386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13263371586799622, + "epoch": 8.5, + "learning_rate": 4.457564656044056e-05, + "loss": 0.1389, + "step": 8951, + "task_loss": 0.1952933669090271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019756224006414413, + "epoch": 8.5, + "learning_rate": 4.456901733542962e-05, + "loss": 0.0185, + "step": 8952, + "task_loss": 0.007240481674671173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1520935297012329, + "epoch": 8.5, + "learning_rate": 4.4562384555612436e-05, + "loss": 0.1599, + "step": 8953, + "task_loss": 0.2299913763999939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10879489779472351, + "epoch": 8.5, + "learning_rate": 4.455574822219388e-05, + "loss": 0.1219, + "step": 8954, + "task_loss": 0.23940874636173248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0421454980969429, + "epoch": 8.5, + "learning_rate": 4.454910833637949e-05, + "loss": 0.0453, + "step": 8955, + "task_loss": 0.07326261699199677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1279737502336502, + "epoch": 8.51, + "learning_rate": 4.454246489937541e-05, + "loss": 0.1322, + "step": 8956, + "task_loss": 0.1699049174785614 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1273374855518341, + "epoch": 8.51, + "learning_rate": 4.4535817912388466e-05, + "loss": 0.1269, + "step": 8957, + "task_loss": 0.12270753085613251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03381875157356262, + "epoch": 8.51, + "learning_rate": 4.4529167376626116e-05, + "loss": 0.061, + "step": 8958, + "task_loss": 0.3059147894382477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035635169595479965, + "epoch": 8.51, + "learning_rate": 4.4522513293296456e-05, + "loss": 0.0395, + "step": 8959, + "task_loss": 0.07383869588375092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08485317975282669, + "epoch": 8.51, + "learning_rate": 4.451585566360823e-05, + "loss": 0.0817, + "step": 8960, + "task_loss": 0.053764086216688156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041285235434770584, + "epoch": 8.51, + "learning_rate": 4.450919448877084e-05, + "loss": 0.0389, + "step": 8961, + "task_loss": 0.017382560297846794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01698409393429756, + "epoch": 8.51, + "learning_rate": 4.4502529769994314e-05, + "loss": 0.0262, + "step": 8962, + "task_loss": 0.10962288081645966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15635554492473602, + "epoch": 8.51, + "learning_rate": 4.449586150848934e-05, + "loss": 0.157, + "step": 8963, + "task_loss": 0.16323071718215942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016910862177610397, + "epoch": 8.51, + "learning_rate": 4.44891897054672e-05, + "loss": 0.0221, + "step": 8964, + "task_loss": 0.06874377280473709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018827490508556366, + "epoch": 8.51, + "learning_rate": 4.4482514362139915e-05, + "loss": 0.0173, + "step": 8965, + "task_loss": 0.003253905102610588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044889599084854126, + "epoch": 8.51, + "learning_rate": 4.4475835479720065e-05, + "loss": 0.0419, + "step": 8966, + "task_loss": 0.014503007754683495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027362622320652008, + "epoch": 8.52, + "learning_rate": 4.4469153059420895e-05, + "loss": 0.0301, + "step": 8967, + "task_loss": 0.054887376725673676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021328095346689224, + "epoch": 8.52, + "learning_rate": 4.4462467102456305e-05, + "loss": 0.0198, + "step": 8968, + "task_loss": 0.005754020065069199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03476056456565857, + "epoch": 8.52, + "learning_rate": 4.4455777610040846e-05, + "loss": 0.0323, + "step": 8969, + "task_loss": 0.010109812021255493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09709491580724716, + "epoch": 8.52, + "learning_rate": 4.444908458338968e-05, + "loss": 0.0945, + "step": 8970, + "task_loss": 0.07128534466028214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037777457386255264, + "epoch": 8.52, + "learning_rate": 4.4442388023718624e-05, + "loss": 0.0562, + "step": 8971, + "task_loss": 0.2219763994216919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05716599524021149, + "epoch": 8.52, + "learning_rate": 4.443568793224415e-05, + "loss": 0.0549, + "step": 8972, + "task_loss": 0.03474997729063034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03178703784942627, + "epoch": 8.52, + "learning_rate": 4.4428984310183364e-05, + "loss": 0.0331, + "step": 8973, + "task_loss": 0.04533195123076439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06467156112194061, + "epoch": 8.52, + "learning_rate": 4.4422277158754005e-05, + "loss": 0.0693, + "step": 8974, + "task_loss": 0.11103697121143341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1044667512178421, + "epoch": 8.52, + "learning_rate": 4.441556647917446e-05, + "loss": 0.106, + "step": 8975, + "task_loss": 0.11955499649047852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06567876040935516, + "epoch": 8.52, + "learning_rate": 4.440885227266376e-05, + "loss": 0.0679, + "step": 8976, + "task_loss": 0.08836454898118973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09253351390361786, + "epoch": 8.53, + "learning_rate": 4.440213454044158e-05, + "loss": 0.0892, + "step": 8977, + "task_loss": 0.05876833572983742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02796206995844841, + "epoch": 8.53, + "learning_rate": 4.43954132837282e-05, + "loss": 0.0414, + "step": 8978, + "task_loss": 0.16239336133003235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07454772293567657, + "epoch": 8.53, + "learning_rate": 4.43886885037446e-05, + "loss": 0.0779, + "step": 8979, + "task_loss": 0.10792988538742065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08012815564870834, + "epoch": 8.53, + "learning_rate": 4.438196020171235e-05, + "loss": 0.0856, + "step": 8980, + "task_loss": 0.13467732071876526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06739650666713715, + "epoch": 8.53, + "learning_rate": 4.437522837885369e-05, + "loss": 0.0743, + "step": 8981, + "task_loss": 0.13656720519065857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039198122918605804, + "epoch": 8.53, + "learning_rate": 4.436849303639148e-05, + "loss": 0.0449, + "step": 8982, + "task_loss": 0.09616616368293762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02301427721977234, + "epoch": 8.53, + "learning_rate": 4.436175417554923e-05, + "loss": 0.0213, + "step": 8983, + "task_loss": 0.005967108532786369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025122862309217453, + "epoch": 8.53, + "learning_rate": 4.4355011797551086e-05, + "loss": 0.0323, + "step": 8984, + "task_loss": 0.09659717977046967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06348798424005508, + "epoch": 8.53, + "learning_rate": 4.4348265903621844e-05, + "loss": 0.0593, + "step": 8985, + "task_loss": 0.02176908776164055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024741999804973602, + "epoch": 8.53, + "learning_rate": 4.4341516494986904e-05, + "loss": 0.029, + "step": 8986, + "task_loss": 0.06750153005123138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022810565307736397, + "epoch": 8.53, + "learning_rate": 4.433476357287235e-05, + "loss": 0.029, + "step": 8987, + "task_loss": 0.08470144867897034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03231749311089516, + "epoch": 8.54, + "learning_rate": 4.432800713850488e-05, + "loss": 0.04, + "step": 8988, + "task_loss": 0.10918974876403809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08315695822238922, + "epoch": 8.54, + "learning_rate": 4.432124719311182e-05, + "loss": 0.0804, + "step": 8989, + "task_loss": 0.056007783859968185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07280036807060242, + "epoch": 8.54, + "learning_rate": 4.431448373792116e-05, + "loss": 0.0704, + "step": 8990, + "task_loss": 0.048897065222263336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023964036256074905, + "epoch": 8.54, + "learning_rate": 4.430771677416151e-05, + "loss": 0.0321, + "step": 8991, + "task_loss": 0.10551959276199341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012696181423962116, + "epoch": 8.54, + "learning_rate": 4.430094630306212e-05, + "loss": 0.0121, + "step": 8992, + "task_loss": 0.006786322221159935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06884432584047318, + "epoch": 8.54, + "learning_rate": 4.429417232585288e-05, + "loss": 0.0656, + "step": 8993, + "task_loss": 0.036455415189266205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03897576034069061, + "epoch": 8.54, + "learning_rate": 4.428739484376431e-05, + "loss": 0.0436, + "step": 8994, + "task_loss": 0.08556883037090302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018597451969981194, + "epoch": 8.54, + "learning_rate": 4.4280613858027584e-05, + "loss": 0.0173, + "step": 8995, + "task_loss": 0.005144355818629265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02616756409406662, + "epoch": 8.54, + "learning_rate": 4.427382936987449e-05, + "loss": 0.0291, + "step": 8996, + "task_loss": 0.05589302256703377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02272038534283638, + "epoch": 8.54, + "learning_rate": 4.426704138053747e-05, + "loss": 0.0209, + "step": 8997, + "task_loss": 0.004764288663864136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017667189240455627, + "epoch": 8.55, + "learning_rate": 4.426024989124959e-05, + "loss": 0.0162, + "step": 8998, + "task_loss": 0.00283648818731308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09155499935150146, + "epoch": 8.55, + "learning_rate": 4.425345490324456e-05, + "loss": 0.0864, + "step": 8999, + "task_loss": 0.04047724977135658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0898907482624054, + "epoch": 8.55, + "learning_rate": 4.424665641775673e-05, + "loss": 0.0954, + "step": 9000, + "task_loss": 0.14472734928131104 + }, + { + "epoch": 8.55, + "eval_accuracy": 0.893348623853211, + "eval_loss": 0.4816896319389343, + "eval_runtime": 18.2944, + "eval_samples_per_second": 47.665, + "eval_steps_per_second": 5.958, + "step": 9000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1226925477385521, + "epoch": 8.55, + "learning_rate": 4.4239854436021056e-05, + "loss": 0.1208, + "step": 9001, + "task_loss": 0.10388394445180893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07912658900022507, + "epoch": 8.55, + "learning_rate": 4.423304895927317e-05, + "loss": 0.0727, + "step": 9002, + "task_loss": 0.014653431251645088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07628723978996277, + "epoch": 8.55, + "learning_rate": 4.4226239988749305e-05, + "loss": 0.0717, + "step": 9003, + "task_loss": 0.030388563871383667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17722564935684204, + "epoch": 8.55, + "learning_rate": 4.4219427525686366e-05, + "loss": 0.1661, + "step": 9004, + "task_loss": 0.06582056730985641 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01308794878423214, + "epoch": 8.55, + "learning_rate": 4.421261157132185e-05, + "loss": 0.0245, + "step": 9005, + "task_loss": 0.1268969178199768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06603103131055832, + "epoch": 8.55, + "learning_rate": 4.4205792126893905e-05, + "loss": 0.0753, + "step": 9006, + "task_loss": 0.15897931158542633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.033943966031074524, + "epoch": 8.55, + "learning_rate": 4.4198969193641324e-05, + "loss": 0.0317, + "step": 9007, + "task_loss": 0.01145954243838787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09488087147474289, + "epoch": 8.55, + "learning_rate": 4.4192142772803535e-05, + "loss": 0.0931, + "step": 9008, + "task_loss": 0.07663992047309875 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04525774344801903, + "epoch": 8.56, + "learning_rate": 4.4185312865620575e-05, + "loss": 0.0469, + "step": 9009, + "task_loss": 0.06158378720283508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022823676466941833, + "epoch": 8.56, + "learning_rate": 4.417847947333314e-05, + "loss": 0.0212, + "step": 9010, + "task_loss": 0.006658636033535004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017958180978894234, + "epoch": 8.56, + "learning_rate": 4.417164259718254e-05, + "loss": 0.0167, + "step": 9011, + "task_loss": 0.005114752799272537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12965308129787445, + "epoch": 8.56, + "learning_rate": 4.416480223841073e-05, + "loss": 0.1233, + "step": 9012, + "task_loss": 0.06576710194349289 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08245688676834106, + "epoch": 8.56, + "learning_rate": 4.4157958398260294e-05, + "loss": 0.0786, + "step": 9013, + "task_loss": 0.04379882290959358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012385990470647812, + "epoch": 8.56, + "learning_rate": 4.415111107797445e-05, + "loss": 0.0117, + "step": 9014, + "task_loss": 0.005246100947260857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08853226900100708, + "epoch": 8.56, + "learning_rate": 4.414426027879705e-05, + "loss": 0.0933, + "step": 9015, + "task_loss": 0.13666236400604248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06031012162566185, + "epoch": 8.56, + "learning_rate": 4.413740600197257e-05, + "loss": 0.0607, + "step": 9016, + "task_loss": 0.06431614607572556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044511131942272186, + "epoch": 8.56, + "learning_rate": 4.413054824874612e-05, + "loss": 0.062, + "step": 9017, + "task_loss": 0.21939462423324585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039623238146305084, + "epoch": 8.56, + "learning_rate": 4.412368702036345e-05, + "loss": 0.0482, + "step": 9018, + "task_loss": 0.12542547285556793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2884577512741089, + "epoch": 8.57, + "learning_rate": 4.4116822318070925e-05, + "loss": 0.2897, + "step": 9019, + "task_loss": 0.3008236885070801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044239118695259094, + "epoch": 8.57, + "learning_rate": 4.4109954143115565e-05, + "loss": 0.0403, + "step": 9020, + "task_loss": 0.004376189783215523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01869271881878376, + "epoch": 8.57, + "learning_rate": 4.4103082496745e-05, + "loss": 0.0232, + "step": 9021, + "task_loss": 0.06380611658096313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0486907958984375, + "epoch": 8.57, + "learning_rate": 4.40962073802075e-05, + "loss": 0.0468, + "step": 9022, + "task_loss": 0.03009847365319729 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01841585896909237, + "epoch": 8.57, + "learning_rate": 4.4089328794751954e-05, + "loss": 0.0171, + "step": 9023, + "task_loss": 0.005129978060722351 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0398845374584198, + "epoch": 8.57, + "learning_rate": 4.4082446741627906e-05, + "loss": 0.0362, + "step": 9024, + "task_loss": 0.003042936325073242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.21794645488262177, + "epoch": 8.57, + "learning_rate": 4.40755612220855e-05, + "loss": 0.2127, + "step": 9025, + "task_loss": 0.165152907371521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07094389200210571, + "epoch": 8.57, + "learning_rate": 4.406867223737553e-05, + "loss": 0.0752, + "step": 9026, + "task_loss": 0.11325624585151672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10760428756475449, + "epoch": 8.57, + "learning_rate": 4.406177978874941e-05, + "loss": 0.1094, + "step": 9027, + "task_loss": 0.1256953477859497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07674893736839294, + "epoch": 8.57, + "learning_rate": 4.405488387745919e-05, + "loss": 0.0779, + "step": 9028, + "task_loss": 0.08849011361598969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05093570053577423, + "epoch": 8.57, + "learning_rate": 4.4047984504757544e-05, + "loss": 0.0768, + "step": 9029, + "task_loss": 0.3099355101585388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02506938949227333, + "epoch": 8.58, + "learning_rate": 4.4041081671897775e-05, + "loss": 0.0271, + "step": 9030, + "task_loss": 0.04571164399385452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018875867128372192, + "epoch": 8.58, + "learning_rate": 4.403417538013382e-05, + "loss": 0.0319, + "step": 9031, + "task_loss": 0.14952301979064941 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02852119505405426, + "epoch": 8.58, + "learning_rate": 4.402726563072024e-05, + "loss": 0.0358, + "step": 9032, + "task_loss": 0.10161833465099335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022057583555579185, + "epoch": 8.58, + "learning_rate": 4.4020352424912226e-05, + "loss": 0.0208, + "step": 9033, + "task_loss": 0.009751364588737488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009804684668779373, + "epoch": 8.58, + "learning_rate": 4.401343576396558e-05, + "loss": 0.0092, + "step": 9034, + "task_loss": 0.0038125887513160706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05322907119989395, + "epoch": 8.58, + "learning_rate": 4.400651564913676e-05, + "loss": 0.053, + "step": 9035, + "task_loss": 0.05118084326386452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01981445588171482, + "epoch": 8.58, + "learning_rate": 4.399959208168284e-05, + "loss": 0.0281, + "step": 9036, + "task_loss": 0.10224159061908722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03673945739865303, + "epoch": 8.58, + "learning_rate": 4.3992665062861514e-05, + "loss": 0.0404, + "step": 9037, + "task_loss": 0.07305504381656647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11148297786712646, + "epoch": 8.58, + "learning_rate": 4.398573459393111e-05, + "loss": 0.1081, + "step": 9038, + "task_loss": 0.07762963324785233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11146679520606995, + "epoch": 8.58, + "learning_rate": 4.3978800676150575e-05, + "loss": 0.108, + "step": 9039, + "task_loss": 0.0765286237001419 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03709033504128456, + "epoch": 8.58, + "learning_rate": 4.39718633107795e-05, + "loss": 0.0438, + "step": 9040, + "task_loss": 0.10385788232088089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038635239005088806, + "epoch": 8.59, + "learning_rate": 4.3964922499078084e-05, + "loss": 0.0372, + "step": 9041, + "task_loss": 0.02382836863398552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051188208162784576, + "epoch": 8.59, + "learning_rate": 4.3957978242307166e-05, + "loss": 0.0502, + "step": 9042, + "task_loss": 0.04129528999328613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028153028339147568, + "epoch": 8.59, + "learning_rate": 4.395103054172819e-05, + "loss": 0.0258, + "step": 9043, + "task_loss": 0.004962790757417679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022645195946097374, + "epoch": 8.59, + "learning_rate": 4.394407939860325e-05, + "loss": 0.0212, + "step": 9044, + "task_loss": 0.008643986657261848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013338697142899036, + "epoch": 8.59, + "learning_rate": 4.3937124814195054e-05, + "loss": 0.0125, + "step": 9045, + "task_loss": 0.004968065768480301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05772208422422409, + "epoch": 8.59, + "learning_rate": 4.393016678976692e-05, + "loss": 0.0609, + "step": 9046, + "task_loss": 0.08959686756134033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03842834755778313, + "epoch": 8.59, + "learning_rate": 4.3923205326582837e-05, + "loss": 0.0449, + "step": 9047, + "task_loss": 0.10268554091453552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03277190029621124, + "epoch": 8.59, + "learning_rate": 4.3916240425907364e-05, + "loss": 0.0405, + "step": 9048, + "task_loss": 0.11014024913311005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020059367641806602, + "epoch": 8.59, + "learning_rate": 4.3909272089005714e-05, + "loss": 0.0194, + "step": 9049, + "task_loss": 0.01322341151535511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2156301587820053, + "epoch": 8.59, + "learning_rate": 4.3902300317143726e-05, + "loss": 0.2129, + "step": 9050, + "task_loss": 0.18819431960582733 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02864878438413143, + "epoch": 8.6, + "learning_rate": 4.389532511158785e-05, + "loss": 0.0264, + "step": 9051, + "task_loss": 0.006369665265083313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04067389667034149, + "epoch": 8.6, + "learning_rate": 4.388834647360516e-05, + "loss": 0.0411, + "step": 9052, + "task_loss": 0.045049235224723816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019816577434539795, + "epoch": 8.6, + "learning_rate": 4.388136440446337e-05, + "loss": 0.0244, + "step": 9053, + "task_loss": 0.06586334109306335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018740810453891754, + "epoch": 8.6, + "learning_rate": 4.387437890543081e-05, + "loss": 0.0203, + "step": 9054, + "task_loss": 0.0338791161775589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03478642553091049, + "epoch": 8.6, + "learning_rate": 4.3867389977776416e-05, + "loss": 0.0369, + "step": 9055, + "task_loss": 0.056206680834293365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06599288433790207, + "epoch": 8.6, + "learning_rate": 4.3860397622769756e-05, + "loss": 0.0698, + "step": 9056, + "task_loss": 0.10445068776607513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06809773296117783, + "epoch": 8.6, + "learning_rate": 4.3853401841681046e-05, + "loss": 0.0656, + "step": 9057, + "task_loss": 0.04264071583747864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0191783644258976, + "epoch": 8.6, + "learning_rate": 4.3846402635781093e-05, + "loss": 0.0178, + "step": 9058, + "task_loss": 0.005517646670341492 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020288830623030663, + "epoch": 8.6, + "learning_rate": 4.3839400006341335e-05, + "loss": 0.0208, + "step": 9059, + "task_loss": 0.025135664269328117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02526734210550785, + "epoch": 8.6, + "learning_rate": 4.383239395463383e-05, + "loss": 0.0395, + "step": 9060, + "task_loss": 0.16764724254608154 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01698536053299904, + "epoch": 8.6, + "learning_rate": 4.382538448193127e-05, + "loss": 0.0157, + "step": 9061, + "task_loss": 0.0042114946991205215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17208489775657654, + "epoch": 8.61, + "learning_rate": 4.381837158950695e-05, + "loss": 0.179, + "step": 9062, + "task_loss": 0.2413436472415924 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03591246157884598, + "epoch": 8.61, + "learning_rate": 4.3811355278634804e-05, + "loss": 0.0522, + "step": 9063, + "task_loss": 0.19905783236026764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03541406989097595, + "epoch": 8.61, + "learning_rate": 4.380433555058937e-05, + "loss": 0.0331, + "step": 9064, + "task_loss": 0.011965034529566765 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1491689682006836, + "epoch": 8.61, + "learning_rate": 4.379731240664583e-05, + "loss": 0.163, + "step": 9065, + "task_loss": 0.2871720790863037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015187927521765232, + "epoch": 8.61, + "learning_rate": 4.379028584807996e-05, + "loss": 0.0142, + "step": 9066, + "task_loss": 0.005585832521319389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031154222786426544, + "epoch": 8.61, + "learning_rate": 4.3783255876168165e-05, + "loss": 0.0329, + "step": 9067, + "task_loss": 0.04878431186079979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12299706786870956, + "epoch": 8.61, + "learning_rate": 4.377622249218748e-05, + "loss": 0.135, + "step": 9068, + "task_loss": 0.24260729551315308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03098423220217228, + "epoch": 8.61, + "learning_rate": 4.376918569741556e-05, + "loss": 0.0388, + "step": 9069, + "task_loss": 0.1092497706413269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02339218370616436, + "epoch": 8.61, + "learning_rate": 4.376214549313066e-05, + "loss": 0.0218, + "step": 9070, + "task_loss": 0.007723584771156311 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0801115334033966, + "epoch": 8.61, + "learning_rate": 4.375510188061167e-05, + "loss": 0.0918, + "step": 9071, + "task_loss": 0.1967240571975708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0836428701877594, + "epoch": 8.62, + "learning_rate": 4.37480548611381e-05, + "loss": 0.0883, + "step": 9072, + "task_loss": 0.1297881305217743 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0943288654088974, + "epoch": 8.62, + "learning_rate": 4.374100443599007e-05, + "loss": 0.0906, + "step": 9073, + "task_loss": 0.0567990243434906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07607637345790863, + "epoch": 8.62, + "learning_rate": 4.3733950606448324e-05, + "loss": 0.0873, + "step": 9074, + "task_loss": 0.18788626790046692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11874774098396301, + "epoch": 8.62, + "learning_rate": 4.3726893373794234e-05, + "loss": 0.1183, + "step": 9075, + "task_loss": 0.11419327557086945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021572759374976158, + "epoch": 8.62, + "learning_rate": 4.3719832739309766e-05, + "loss": 0.0243, + "step": 9076, + "task_loss": 0.04849618673324585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026811379939317703, + "epoch": 8.62, + "learning_rate": 4.371276870427753e-05, + "loss": 0.0278, + "step": 9077, + "task_loss": 0.03716592118144035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008828360587358475, + "epoch": 8.62, + "learning_rate": 4.3705701269980734e-05, + "loss": 0.0082, + "step": 9078, + "task_loss": 0.0030191540718078613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05028454214334488, + "epoch": 8.62, + "learning_rate": 4.369863043770322e-05, + "loss": 0.0475, + "step": 9079, + "task_loss": 0.022545762360095978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047442540526390076, + "epoch": 8.62, + "learning_rate": 4.369155620872943e-05, + "loss": 0.0435, + "step": 9080, + "task_loss": 0.007748594507575035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014092406257987022, + "epoch": 8.62, + "learning_rate": 4.3684478584344433e-05, + "loss": 0.0131, + "step": 9081, + "task_loss": 0.004466302692890167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07513783872127533, + "epoch": 8.62, + "learning_rate": 4.367739756583392e-05, + "loss": 0.0833, + "step": 9082, + "task_loss": 0.1565927267074585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019305624067783356, + "epoch": 8.63, + "learning_rate": 4.367031315448419e-05, + "loss": 0.018, + "step": 9083, + "task_loss": 0.006342671811580658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0106560830026865, + "epoch": 8.63, + "learning_rate": 4.366322535158215e-05, + "loss": 0.014, + "step": 9084, + "task_loss": 0.043936390429735184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06862330436706543, + "epoch": 8.63, + "learning_rate": 4.3656134158415344e-05, + "loss": 0.0769, + "step": 9085, + "task_loss": 0.15090253949165344 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032704662531614304, + "epoch": 8.63, + "learning_rate": 4.364903957627192e-05, + "loss": 0.0301, + "step": 9086, + "task_loss": 0.006765572354197502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022231170907616615, + "epoch": 8.63, + "learning_rate": 4.3641941606440644e-05, + "loss": 0.0292, + "step": 9087, + "task_loss": 0.09189935028553009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15758824348449707, + "epoch": 8.63, + "learning_rate": 4.36348402502109e-05, + "loss": 0.1631, + "step": 9088, + "task_loss": 0.212388277053833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018683135509490967, + "epoch": 8.63, + "learning_rate": 4.3627735508872666e-05, + "loss": 0.0174, + "step": 9089, + "task_loss": 0.0056790560483932495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03444650024175644, + "epoch": 8.63, + "learning_rate": 4.362062738371657e-05, + "loss": 0.0375, + "step": 9090, + "task_loss": 0.06486361473798752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032534707337617874, + "epoch": 8.63, + "learning_rate": 4.361351587603384e-05, + "loss": 0.0591, + "step": 9091, + "task_loss": 0.29826819896698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02512388862669468, + "epoch": 8.63, + "learning_rate": 4.360640098711629e-05, + "loss": 0.0234, + "step": 9092, + "task_loss": 0.007688479498028755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.053498346358537674, + "epoch": 8.64, + "learning_rate": 4.3599282718256406e-05, + "loss": 0.0579, + "step": 9093, + "task_loss": 0.09801323711872101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018389012664556503, + "epoch": 8.64, + "learning_rate": 4.3592161070747233e-05, + "loss": 0.017, + "step": 9094, + "task_loss": 0.004471609368920326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03994788974523544, + "epoch": 8.64, + "learning_rate": 4.358503604588247e-05, + "loss": 0.054, + "step": 9095, + "task_loss": 0.1802259087562561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0729394480586052, + "epoch": 8.64, + "learning_rate": 4.357790764495639e-05, + "loss": 0.0712, + "step": 9096, + "task_loss": 0.055580612272024155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05639396235346794, + "epoch": 8.64, + "learning_rate": 4.357077586926392e-05, + "loss": 0.065, + "step": 9097, + "task_loss": 0.1426202952861786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06405860185623169, + "epoch": 8.64, + "learning_rate": 4.356364072010059e-05, + "loss": 0.0699, + "step": 9098, + "task_loss": 0.1220637708902359 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0317995660007, + "epoch": 8.64, + "learning_rate": 4.3556502198762496e-05, + "loss": 0.0292, + "step": 9099, + "task_loss": 0.006273902952671051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11453507095575333, + "epoch": 8.64, + "learning_rate": 4.354936030654642e-05, + "loss": 0.1091, + "step": 9100, + "task_loss": 0.060647960752248764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020458191633224487, + "epoch": 8.64, + "learning_rate": 4.3542215044749705e-05, + "loss": 0.0258, + "step": 9101, + "task_loss": 0.07395413517951965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04494299367070198, + "epoch": 8.64, + "learning_rate": 4.3535066414670336e-05, + "loss": 0.0484, + "step": 9102, + "task_loss": 0.0797007754445076 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04942406713962555, + "epoch": 8.64, + "learning_rate": 4.352791441760687e-05, + "loss": 0.0658, + "step": 9103, + "task_loss": 0.21325786411762238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07142602652311325, + "epoch": 8.65, + "learning_rate": 4.352075905485854e-05, + "loss": 0.0679, + "step": 9104, + "task_loss": 0.03596463054418564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03519432991743088, + "epoch": 8.65, + "learning_rate": 4.351360032772512e-05, + "loss": 0.0375, + "step": 9105, + "task_loss": 0.05811901390552521 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09797616302967072, + "epoch": 8.65, + "learning_rate": 4.3506438237507033e-05, + "loss": 0.0951, + "step": 9106, + "task_loss": 0.06876572221517563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0369865708053112, + "epoch": 8.65, + "learning_rate": 4.3499272785505316e-05, + "loss": 0.0636, + "step": 9107, + "task_loss": 0.3026273250579834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05089612305164337, + "epoch": 8.65, + "learning_rate": 4.349210397302161e-05, + "loss": 0.0513, + "step": 9108, + "task_loss": 0.05530446022748947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019257325679063797, + "epoch": 8.65, + "learning_rate": 4.348493180135815e-05, + "loss": 0.0181, + "step": 9109, + "task_loss": 0.007393643260002136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01597008667886257, + "epoch": 8.65, + "learning_rate": 4.347775627181782e-05, + "loss": 0.0151, + "step": 9110, + "task_loss": 0.006781516596674919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05022025108337402, + "epoch": 8.65, + "learning_rate": 4.3470577385704056e-05, + "loss": 0.0523, + "step": 9111, + "task_loss": 0.07132594287395477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07263205945491791, + "epoch": 8.65, + "learning_rate": 4.346339514432096e-05, + "loss": 0.0749, + "step": 9112, + "task_loss": 0.09521390497684479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03168325126171112, + "epoch": 8.65, + "learning_rate": 4.345620954897322e-05, + "loss": 0.0433, + "step": 9113, + "task_loss": 0.1476568728685379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06712833046913147, + "epoch": 8.66, + "learning_rate": 4.344902060096612e-05, + "loss": 0.0705, + "step": 9114, + "task_loss": 0.10096494853496552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04527723044157028, + "epoch": 8.66, + "learning_rate": 4.344182830160558e-05, + "loss": 0.0446, + "step": 9115, + "task_loss": 0.03821223974227905 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035746797919273376, + "epoch": 8.66, + "learning_rate": 4.343463265219811e-05, + "loss": 0.0337, + "step": 9116, + "task_loss": 0.014950959011912346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0270676426589489, + "epoch": 8.66, + "learning_rate": 4.342743365405084e-05, + "loss": 0.0262, + "step": 9117, + "task_loss": 0.018197346478700638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05599695444107056, + "epoch": 8.66, + "learning_rate": 4.3420231308471496e-05, + "loss": 0.0586, + "step": 9118, + "task_loss": 0.08177006244659424 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02762037329375744, + "epoch": 8.66, + "learning_rate": 4.3413025616768424e-05, + "loss": 0.0306, + "step": 9119, + "task_loss": 0.057342953979969025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05162864178419113, + "epoch": 8.66, + "learning_rate": 4.340581658025058e-05, + "loss": 0.0553, + "step": 9120, + "task_loss": 0.08785432577133179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029456917196512222, + "epoch": 8.66, + "learning_rate": 4.33986042002275e-05, + "loss": 0.0385, + "step": 9121, + "task_loss": 0.11994407325983047 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0898047387599945, + "epoch": 8.66, + "learning_rate": 4.339138847800936e-05, + "loss": 0.105, + "step": 9122, + "task_loss": 0.24178752303123474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09941445291042328, + "epoch": 8.66, + "learning_rate": 4.3384169414906925e-05, + "loss": 0.107, + "step": 9123, + "task_loss": 0.1748739629983902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01261752750724554, + "epoch": 8.66, + "learning_rate": 4.3376947012231586e-05, + "loss": 0.0193, + "step": 9124, + "task_loss": 0.07987774163484573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02077942155301571, + "epoch": 8.67, + "learning_rate": 4.336972127129532e-05, + "loss": 0.0293, + "step": 9125, + "task_loss": 0.10643217712640762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16735947132110596, + "epoch": 8.67, + "learning_rate": 4.3362492193410705e-05, + "loss": 0.1573, + "step": 9126, + "task_loss": 0.06678342819213867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030655119568109512, + "epoch": 8.67, + "learning_rate": 4.335525977989095e-05, + "loss": 0.0421, + "step": 9127, + "task_loss": 0.14501135051250458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0633954331278801, + "epoch": 8.67, + "learning_rate": 4.334802403204986e-05, + "loss": 0.0723, + "step": 9128, + "task_loss": 0.15235358476638794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021779123693704605, + "epoch": 8.67, + "learning_rate": 4.334078495120184e-05, + "loss": 0.0288, + "step": 9129, + "task_loss": 0.09162303805351257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11775536835193634, + "epoch": 8.67, + "learning_rate": 4.33335425386619e-05, + "loss": 0.1184, + "step": 9130, + "task_loss": 0.1240459531545639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1136549636721611, + "epoch": 8.67, + "learning_rate": 4.332629679574566e-05, + "loss": 0.1147, + "step": 9131, + "task_loss": 0.12449486553668976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0809292122721672, + "epoch": 8.67, + "learning_rate": 4.331904772376935e-05, + "loss": 0.0773, + "step": 9132, + "task_loss": 0.04456901177763939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02589108981192112, + "epoch": 8.67, + "learning_rate": 4.3311795324049795e-05, + "loss": 0.024, + "step": 9133, + "task_loss": 0.006641261279582977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027674835175275803, + "epoch": 8.67, + "learning_rate": 4.3304539597904435e-05, + "loss": 0.0262, + "step": 9134, + "task_loss": 0.013046126812696457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024105960503220558, + "epoch": 8.68, + "learning_rate": 4.3297280546651295e-05, + "loss": 0.0225, + "step": 9135, + "task_loss": 0.007911447435617447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03654143214225769, + "epoch": 8.68, + "learning_rate": 4.329001817160903e-05, + "loss": 0.0334, + "step": 9136, + "task_loss": 0.0051150210201740265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07622884958982468, + "epoch": 8.68, + "learning_rate": 4.3282752474096864e-05, + "loss": 0.0791, + "step": 9137, + "task_loss": 0.10472090542316437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03936569392681122, + "epoch": 8.68, + "learning_rate": 4.327548345543467e-05, + "loss": 0.0384, + "step": 9138, + "task_loss": 0.029306685552001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015473714098334312, + "epoch": 8.68, + "learning_rate": 4.326821111694289e-05, + "loss": 0.0146, + "step": 9139, + "task_loss": 0.00628245621919632 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025059562176465988, + "epoch": 8.68, + "learning_rate": 4.3260935459942584e-05, + "loss": 0.0311, + "step": 9140, + "task_loss": 0.08575528860092163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0825490728020668, + "epoch": 8.68, + "learning_rate": 4.32536564857554e-05, + "loss": 0.0862, + "step": 9141, + "task_loss": 0.11920452117919922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03180139511823654, + "epoch": 8.68, + "learning_rate": 4.3246374195703604e-05, + "loss": 0.0335, + "step": 9142, + "task_loss": 0.04881680756807327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06249899044632912, + "epoch": 8.68, + "learning_rate": 4.3239088591110065e-05, + "loss": 0.0643, + "step": 9143, + "task_loss": 0.08072178065776825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06553536653518677, + "epoch": 8.68, + "learning_rate": 4.323179967329824e-05, + "loss": 0.0638, + "step": 9144, + "task_loss": 0.048590727150440216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01741437427699566, + "epoch": 8.68, + "learning_rate": 4.3224507443592196e-05, + "loss": 0.0162, + "step": 9145, + "task_loss": 0.005251972004771233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15998907387256622, + "epoch": 8.69, + "learning_rate": 4.321721190331661e-05, + "loss": 0.1598, + "step": 9146, + "task_loss": 0.15769526362419128 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041096534579992294, + "epoch": 8.69, + "learning_rate": 4.3209913053796746e-05, + "loss": 0.0477, + "step": 9147, + "task_loss": 0.10717198252677917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06216401606798172, + "epoch": 8.69, + "learning_rate": 4.3202610896358474e-05, + "loss": 0.0581, + "step": 9148, + "task_loss": 0.02124890312552452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02210739627480507, + "epoch": 8.69, + "learning_rate": 4.319530543232827e-05, + "loss": 0.0205, + "step": 9149, + "task_loss": 0.006114525720477104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.049875520169734955, + "epoch": 8.69, + "learning_rate": 4.31879966630332e-05, + "loss": 0.0462, + "step": 9150, + "task_loss": 0.012633267790079117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0812768042087555, + "epoch": 8.69, + "learning_rate": 4.318068458980095e-05, + "loss": 0.089, + "step": 9151, + "task_loss": 0.15828600525856018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03615918010473251, + "epoch": 8.69, + "learning_rate": 4.317336921395978e-05, + "loss": 0.0358, + "step": 9152, + "task_loss": 0.03290475159883499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01615232229232788, + "epoch": 8.69, + "learning_rate": 4.316605053683856e-05, + "loss": 0.0159, + "step": 9153, + "task_loss": 0.013984838500618935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12520606815814972, + "epoch": 8.69, + "learning_rate": 4.3158728559766786e-05, + "loss": 0.1234, + "step": 9154, + "task_loss": 0.10756815969944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021900830790400505, + "epoch": 8.69, + "learning_rate": 4.315140328407451e-05, + "loss": 0.0242, + "step": 9155, + "task_loss": 0.044664304703474045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0630156397819519, + "epoch": 8.7, + "learning_rate": 4.314407471109241e-05, + "loss": 0.0814, + "step": 9156, + "task_loss": 0.2466500848531723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038144879043102264, + "epoch": 8.7, + "learning_rate": 4.313674284215176e-05, + "loss": 0.0364, + "step": 9157, + "task_loss": 0.020709164440631866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09585539251565933, + "epoch": 8.7, + "learning_rate": 4.312940767858441e-05, + "loss": 0.0947, + "step": 9158, + "task_loss": 0.08445055782794952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021123457700014114, + "epoch": 8.7, + "learning_rate": 4.312206922172286e-05, + "loss": 0.0196, + "step": 9159, + "task_loss": 0.005899334326386452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026478923857212067, + "epoch": 8.7, + "learning_rate": 4.311472747290015e-05, + "loss": 0.0246, + "step": 9160, + "task_loss": 0.007890569046139717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.048453450202941895, + "epoch": 8.7, + "learning_rate": 4.310738243344996e-05, + "loss": 0.0517, + "step": 9161, + "task_loss": 0.0805249959230423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.040751051157712936, + "epoch": 8.7, + "learning_rate": 4.310003410470653e-05, + "loss": 0.0386, + "step": 9162, + "task_loss": 0.018803205341100693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04210975766181946, + "epoch": 8.7, + "learning_rate": 4.309268248800476e-05, + "loss": 0.0473, + "step": 9163, + "task_loss": 0.09379490464925766 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0314488485455513, + "epoch": 8.7, + "learning_rate": 4.3085327584680056e-05, + "loss": 0.0393, + "step": 9164, + "task_loss": 0.10950774699449539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023545613512396812, + "epoch": 8.7, + "learning_rate": 4.3077969396068505e-05, + "loss": 0.0339, + "step": 9165, + "task_loss": 0.12670263648033142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026402778923511505, + "epoch": 8.7, + "learning_rate": 4.307060792350675e-05, + "loss": 0.0296, + "step": 9166, + "task_loss": 0.057920970022678375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022520121186971664, + "epoch": 8.71, + "learning_rate": 4.306324316833203e-05, + "loss": 0.0211, + "step": 9167, + "task_loss": 0.008033214136958122 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02548299916088581, + "epoch": 8.71, + "learning_rate": 4.3055875131882204e-05, + "loss": 0.0301, + "step": 9168, + "task_loss": 0.071867436170578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11392084509134293, + "epoch": 8.71, + "learning_rate": 4.30485038154957e-05, + "loss": 0.1113, + "step": 9169, + "task_loss": 0.08807753771543503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013116149231791496, + "epoch": 8.71, + "learning_rate": 4.304112922051155e-05, + "loss": 0.0123, + "step": 9170, + "task_loss": 0.005404811352491379 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09974595159292221, + "epoch": 8.71, + "learning_rate": 4.30337513482694e-05, + "loss": 0.0927, + "step": 9171, + "task_loss": 0.028922712430357933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017070695757865906, + "epoch": 8.71, + "learning_rate": 4.3026370200109463e-05, + "loss": 0.0241, + "step": 9172, + "task_loss": 0.08706965297460556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04366880655288696, + "epoch": 8.71, + "learning_rate": 4.301898577737255e-05, + "loss": 0.0453, + "step": 9173, + "task_loss": 0.06001284718513489 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021489912644028664, + "epoch": 8.71, + "learning_rate": 4.3011598081400105e-05, + "loss": 0.0281, + "step": 9174, + "task_loss": 0.08786029368638992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02259124629199505, + "epoch": 8.71, + "learning_rate": 4.3004207113534124e-05, + "loss": 0.0246, + "step": 9175, + "task_loss": 0.04274073615670204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07785982638597488, + "epoch": 8.71, + "learning_rate": 4.2996812875117206e-05, + "loss": 0.0815, + "step": 9176, + "task_loss": 0.114007368683815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02123086154460907, + "epoch": 8.72, + "learning_rate": 4.2989415367492556e-05, + "loss": 0.02, + "step": 9177, + "task_loss": 0.008761711418628693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020061947405338287, + "epoch": 8.72, + "learning_rate": 4.298201459200397e-05, + "loss": 0.0188, + "step": 9178, + "task_loss": 0.007334306836128235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04284045845270157, + "epoch": 8.72, + "learning_rate": 4.2974610549995834e-05, + "loss": 0.041, + "step": 9179, + "task_loss": 0.024930864572525024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05956036597490311, + "epoch": 8.72, + "learning_rate": 4.296720324281311e-05, + "loss": 0.0714, + "step": 9180, + "task_loss": 0.17790013551712036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035681623965501785, + "epoch": 8.72, + "learning_rate": 4.29597926718014e-05, + "loss": 0.0331, + "step": 9181, + "task_loss": 0.009932447224855423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03028404340147972, + "epoch": 8.72, + "learning_rate": 4.295237883830685e-05, + "loss": 0.0336, + "step": 9182, + "task_loss": 0.06349918246269226 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.056420549750328064, + "epoch": 8.72, + "learning_rate": 4.294496174367623e-05, + "loss": 0.0584, + "step": 9183, + "task_loss": 0.0761711597442627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08292526006698608, + "epoch": 8.72, + "learning_rate": 4.2937541389256877e-05, + "loss": 0.0884, + "step": 9184, + "task_loss": 0.13766731321811676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013823905028402805, + "epoch": 8.72, + "learning_rate": 4.293011777639675e-05, + "loss": 0.0128, + "step": 9185, + "task_loss": 0.0035786759108304977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04730577394366264, + "epoch": 8.72, + "learning_rate": 4.2922690906444374e-05, + "loss": 0.0534, + "step": 9186, + "task_loss": 0.1083056777715683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07880745083093643, + "epoch": 8.72, + "learning_rate": 4.291526078074888e-05, + "loss": 0.0767, + "step": 9187, + "task_loss": 0.05808747559785843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027023762464523315, + "epoch": 8.73, + "learning_rate": 4.290782740065997e-05, + "loss": 0.0305, + "step": 9188, + "task_loss": 0.061429526656866074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06073024123907089, + "epoch": 8.73, + "learning_rate": 4.290039076752799e-05, + "loss": 0.0719, + "step": 9189, + "task_loss": 0.172482430934906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015384482219815254, + "epoch": 8.73, + "learning_rate": 4.28929508827038e-05, + "loss": 0.0144, + "step": 9190, + "task_loss": 0.00527518056333065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01384667120873928, + "epoch": 8.73, + "learning_rate": 4.288550774753892e-05, + "loss": 0.0179, + "step": 9191, + "task_loss": 0.054292093962430954 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03736741095781326, + "epoch": 8.73, + "learning_rate": 4.2878061363385414e-05, + "loss": 0.0363, + "step": 9192, + "task_loss": 0.02659483253955841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025986768305301666, + "epoch": 8.73, + "learning_rate": 4.287061173159597e-05, + "loss": 0.0431, + "step": 9193, + "task_loss": 0.19738051295280457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02209838107228279, + "epoch": 8.73, + "learning_rate": 4.286315885352382e-05, + "loss": 0.03, + "step": 9194, + "task_loss": 0.10124228149652481 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024954846128821373, + "epoch": 8.73, + "learning_rate": 4.285570273052285e-05, + "loss": 0.0387, + "step": 9195, + "task_loss": 0.1625334769487381 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025567319244146347, + "epoch": 8.73, + "learning_rate": 4.2848243363947484e-05, + "loss": 0.0244, + "step": 9196, + "task_loss": 0.014310002326965332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0735805556178093, + "epoch": 8.73, + "learning_rate": 4.2840780755152746e-05, + "loss": 0.0714, + "step": 9197, + "task_loss": 0.0521991103887558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0836353451013565, + "epoch": 8.74, + "learning_rate": 4.283331490549426e-05, + "loss": 0.0817, + "step": 9198, + "task_loss": 0.06436392664909363 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04380929470062256, + "epoch": 8.74, + "learning_rate": 4.282584581632824e-05, + "loss": 0.0602, + "step": 9199, + "task_loss": 0.20809507369995117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12274999171495438, + "epoch": 8.74, + "learning_rate": 4.281837348901148e-05, + "loss": 0.126, + "step": 9200, + "task_loss": 0.1554453819990158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057108841836452484, + "epoch": 8.74, + "learning_rate": 4.281089792490136e-05, + "loss": 0.0531, + "step": 9201, + "task_loss": 0.016610583290457726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03790339082479477, + "epoch": 8.74, + "learning_rate": 4.280341912535585e-05, + "loss": 0.0477, + "step": 9202, + "task_loss": 0.1361059844493866 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06707513332366943, + "epoch": 8.74, + "learning_rate": 4.2795937091733515e-05, + "loss": 0.076, + "step": 9203, + "task_loss": 0.1563674360513687 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06679324805736542, + "epoch": 8.74, + "learning_rate": 4.27884518253935e-05, + "loss": 0.0657, + "step": 9204, + "task_loss": 0.056202180683612823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04162095487117767, + "epoch": 8.74, + "learning_rate": 4.278096332769555e-05, + "loss": 0.0453, + "step": 9205, + "task_loss": 0.07872645556926727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028269974514842033, + "epoch": 8.74, + "learning_rate": 4.277347159999997e-05, + "loss": 0.0262, + "step": 9206, + "task_loss": 0.00805065967142582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1113031655550003, + "epoch": 8.74, + "learning_rate": 4.276597664366767e-05, + "loss": 0.1132, + "step": 9207, + "task_loss": 0.1301630735397339 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028668878600001335, + "epoch": 8.74, + "learning_rate": 4.2758478460060166e-05, + "loss": 0.0301, + "step": 9208, + "task_loss": 0.04257269576191902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03439326956868172, + "epoch": 8.75, + "learning_rate": 4.275097705053951e-05, + "loss": 0.0324, + "step": 9209, + "task_loss": 0.014087924733757973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02624572440981865, + "epoch": 8.75, + "learning_rate": 4.2743472416468385e-05, + "loss": 0.0285, + "step": 9210, + "task_loss": 0.048966288566589355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013097495771944523, + "epoch": 8.75, + "learning_rate": 4.2735964559210054e-05, + "loss": 0.0122, + "step": 9211, + "task_loss": 0.004471452906727791 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08438113331794739, + "epoch": 8.75, + "learning_rate": 4.272845348012833e-05, + "loss": 0.091, + "step": 9212, + "task_loss": 0.1502004712820053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015331685543060303, + "epoch": 8.75, + "learning_rate": 4.272093918058766e-05, + "loss": 0.0143, + "step": 9213, + "task_loss": 0.004537465050816536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019342856481671333, + "epoch": 8.75, + "learning_rate": 4.271342166195304e-05, + "loss": 0.0346, + "step": 9214, + "task_loss": 0.17235919833183289 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023674393072724342, + "epoch": 8.75, + "learning_rate": 4.2705900925590056e-05, + "loss": 0.0295, + "step": 9215, + "task_loss": 0.08143065869808197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013667159713804722, + "epoch": 8.75, + "learning_rate": 4.269837697286491e-05, + "loss": 0.0127, + "step": 9216, + "task_loss": 0.0043479762971401215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03081374801695347, + "epoch": 8.75, + "learning_rate": 4.269084980514434e-05, + "loss": 0.0283, + "step": 9217, + "task_loss": 0.0054970309138298035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019850486889481544, + "epoch": 8.75, + "learning_rate": 4.268331942379571e-05, + "loss": 0.0197, + "step": 9218, + "task_loss": 0.018215559422969818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.053914956748485565, + "epoch": 8.75, + "learning_rate": 4.267578583018694e-05, + "loss": 0.0562, + "step": 9219, + "task_loss": 0.07634520530700684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0433831587433815, + "epoch": 8.76, + "learning_rate": 4.2668249025686545e-05, + "loss": 0.0427, + "step": 9220, + "task_loss": 0.03700166195631027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09998966753482819, + "epoch": 8.76, + "learning_rate": 4.2660709011663624e-05, + "loss": 0.1053, + "step": 9221, + "task_loss": 0.1534278392791748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17341908812522888, + "epoch": 8.76, + "learning_rate": 4.2653165789487864e-05, + "loss": 0.175, + "step": 9222, + "task_loss": 0.189271941781044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021686669439077377, + "epoch": 8.76, + "learning_rate": 4.2645619360529514e-05, + "loss": 0.0204, + "step": 9223, + "task_loss": 0.009309127926826477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030826609581708908, + "epoch": 8.76, + "learning_rate": 4.2638069726159424e-05, + "loss": 0.0394, + "step": 9224, + "task_loss": 0.11687062680721283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01709115505218506, + "epoch": 8.76, + "learning_rate": 4.263051688774902e-05, + "loss": 0.0219, + "step": 9225, + "task_loss": 0.06518833339214325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03491377830505371, + "epoch": 8.76, + "learning_rate": 4.262296084667032e-05, + "loss": 0.0541, + "step": 9226, + "task_loss": 0.22655193507671356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0234515480697155, + "epoch": 8.76, + "learning_rate": 4.2615401604295905e-05, + "loss": 0.0265, + "step": 9227, + "task_loss": 0.0536535307765007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09627413749694824, + "epoch": 8.76, + "learning_rate": 4.260783916199895e-05, + "loss": 0.0981, + "step": 9228, + "task_loss": 0.11438637971878052 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.065280482172966, + "epoch": 8.76, + "learning_rate": 4.260027352115321e-05, + "loss": 0.0611, + "step": 9229, + "task_loss": 0.02382420189678669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04924977943301201, + "epoch": 8.77, + "learning_rate": 4.2592704683133035e-05, + "loss": 0.0536, + "step": 9230, + "task_loss": 0.09264830499887466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02826646901667118, + "epoch": 8.77, + "learning_rate": 4.258513264931331e-05, + "loss": 0.0412, + "step": 9231, + "task_loss": 0.1579834669828415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018645115196704865, + "epoch": 8.77, + "learning_rate": 4.257755742106956e-05, + "loss": 0.0172, + "step": 9232, + "task_loss": 0.0041794683784246445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012779043056070805, + "epoch": 8.77, + "learning_rate": 4.256997899977784e-05, + "loss": 0.012, + "step": 9233, + "task_loss": 0.004724707454442978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03663068264722824, + "epoch": 8.77, + "learning_rate": 4.2562397386814823e-05, + "loss": 0.042, + "step": 9234, + "task_loss": 0.09023445844650269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018597885966300964, + "epoch": 8.77, + "learning_rate": 4.255481258355773e-05, + "loss": 0.0174, + "step": 9235, + "task_loss": 0.006651686504483223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1508217453956604, + "epoch": 8.77, + "learning_rate": 4.254722459138441e-05, + "loss": 0.1488, + "step": 9236, + "task_loss": 0.1301749348640442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04240924119949341, + "epoch": 8.77, + "learning_rate": 4.253963341167321e-05, + "loss": 0.0484, + "step": 9237, + "task_loss": 0.10239571332931519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034557685256004333, + "epoch": 8.77, + "learning_rate": 4.253203904580314e-05, + "loss": 0.0318, + "step": 9238, + "task_loss": 0.007468333467841148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007893304340541363, + "epoch": 8.77, + "learning_rate": 4.252444149515374e-05, + "loss": 0.0165, + "step": 9239, + "task_loss": 0.09393760561943054 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03578212484717369, + "epoch": 8.77, + "learning_rate": 4.251684076110514e-05, + "loss": 0.0509, + "step": 9240, + "task_loss": 0.1870957762002945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024300862103700638, + "epoch": 8.78, + "learning_rate": 4.250923684503806e-05, + "loss": 0.0296, + "step": 9241, + "task_loss": 0.07725972682237625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14117558300495148, + "epoch": 8.78, + "learning_rate": 4.2501629748333774e-05, + "loss": 0.1339, + "step": 9242, + "task_loss": 0.06846613436937332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10145477950572968, + "epoch": 8.78, + "learning_rate": 4.249401947237417e-05, + "loss": 0.0969, + "step": 9243, + "task_loss": 0.056025002151727676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059924256056547165, + "epoch": 8.78, + "learning_rate": 4.248640601854166e-05, + "loss": 0.0585, + "step": 9244, + "task_loss": 0.045406997203826904 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08220744132995605, + "epoch": 8.78, + "learning_rate": 4.247878938821929e-05, + "loss": 0.0747, + "step": 9245, + "task_loss": 0.007002789527177811 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14508725702762604, + "epoch": 8.78, + "learning_rate": 4.247116958279065e-05, + "loss": 0.1431, + "step": 9246, + "task_loss": 0.1252875030040741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03380056470632553, + "epoch": 8.78, + "learning_rate": 4.246354660363991e-05, + "loss": 0.0311, + "step": 9247, + "task_loss": 0.007123725488781929 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03263848274946213, + "epoch": 8.78, + "learning_rate": 4.245592045215182e-05, + "loss": 0.0461, + "step": 9248, + "task_loss": 0.16699370741844177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04700472578406334, + "epoch": 8.78, + "learning_rate": 4.244829112971172e-05, + "loss": 0.0644, + "step": 9249, + "task_loss": 0.22047923505306244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13564848899841309, + "epoch": 8.78, + "learning_rate": 4.24406586377055e-05, + "loss": 0.1365, + "step": 9250, + "task_loss": 0.14452001452445984 + }, + { + "epoch": 8.78, + "eval_accuracy": 0.8876146788990825, + "eval_loss": 0.4403258264064789, + "eval_runtime": 18.1936, + "eval_samples_per_second": 47.929, + "eval_steps_per_second": 5.991, + "step": 9250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11035147309303284, + "epoch": 8.79, + "learning_rate": 4.2433022977519645e-05, + "loss": 0.1127, + "step": 9251, + "task_loss": 0.13335135579109192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1526888608932495, + "epoch": 8.79, + "learning_rate": 4.2425384150541206e-05, + "loss": 0.148, + "step": 9252, + "task_loss": 0.10532040894031525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07202281802892685, + "epoch": 8.79, + "learning_rate": 4.2417742158157816e-05, + "loss": 0.0773, + "step": 9253, + "task_loss": 0.12501731514930725 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0764976292848587, + "epoch": 8.79, + "learning_rate": 4.2410097001757676e-05, + "loss": 0.0794, + "step": 9254, + "task_loss": 0.10542768239974976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01943717896938324, + "epoch": 8.79, + "learning_rate": 4.2402448682729566e-05, + "loss": 0.0181, + "step": 9255, + "task_loss": 0.005980361253023148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06404136121273041, + "epoch": 8.79, + "learning_rate": 4.2394797202462844e-05, + "loss": 0.0716, + "step": 9256, + "task_loss": 0.13929347693920135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0188496895134449, + "epoch": 8.79, + "learning_rate": 4.238714256234744e-05, + "loss": 0.018, + "step": 9257, + "task_loss": 0.010533113032579422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021757036447525024, + "epoch": 8.79, + "learning_rate": 4.237948476377385e-05, + "loss": 0.0224, + "step": 9258, + "task_loss": 0.028252597898244858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038637999445199966, + "epoch": 8.79, + "learning_rate": 4.237182380813315e-05, + "loss": 0.0356, + "step": 9259, + "task_loss": 0.008690120652318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03595548868179321, + "epoch": 8.79, + "learning_rate": 4.236415969681699e-05, + "loss": 0.0449, + "step": 9260, + "task_loss": 0.12517251074314117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014416642487049103, + "epoch": 8.79, + "learning_rate": 4.23564924312176e-05, + "loss": 0.0177, + "step": 9261, + "task_loss": 0.047183021903038025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08503177762031555, + "epoch": 8.8, + "learning_rate": 4.2348822012727765e-05, + "loss": 0.0902, + "step": 9262, + "task_loss": 0.13704237341880798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.060867466032505035, + "epoch": 8.8, + "learning_rate": 4.234114844274086e-05, + "loss": 0.0737, + "step": 9263, + "task_loss": 0.1892949640750885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04127098247408867, + "epoch": 8.8, + "learning_rate": 4.2333471722650826e-05, + "loss": 0.0375, + "step": 9264, + "task_loss": 0.0030923504382371902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02909199520945549, + "epoch": 8.8, + "learning_rate": 4.232579185385217e-05, + "loss": 0.0267, + "step": 9265, + "task_loss": 0.004700042307376862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13650727272033691, + "epoch": 8.8, + "learning_rate": 4.231810883773999e-05, + "loss": 0.1356, + "step": 9266, + "task_loss": 0.1277703493833542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07909749448299408, + "epoch": 8.8, + "learning_rate": 4.231042267570993e-05, + "loss": 0.0816, + "step": 9267, + "task_loss": 0.10437445342540741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03750952333211899, + "epoch": 8.8, + "learning_rate": 4.230273336915822e-05, + "loss": 0.0398, + "step": 9268, + "task_loss": 0.06028630957007408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02200540341436863, + "epoch": 8.8, + "learning_rate": 4.2295040919481664e-05, + "loss": 0.0203, + "step": 9269, + "task_loss": 0.005379866808652878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1982060968875885, + "epoch": 8.8, + "learning_rate": 4.228734532807763e-05, + "loss": 0.19, + "step": 9270, + "task_loss": 0.11627163738012314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03145551681518555, + "epoch": 8.8, + "learning_rate": 4.2279646596344067e-05, + "loss": 0.0289, + "step": 9271, + "task_loss": 0.006189312785863876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02007415145635605, + "epoch": 8.81, + "learning_rate": 4.227194472567948e-05, + "loss": 0.0271, + "step": 9272, + "task_loss": 0.09050026535987854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013911131769418716, + "epoch": 8.81, + "learning_rate": 4.2264239717482945e-05, + "loss": 0.0129, + "step": 9273, + "task_loss": 0.004193015396595001 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0349629707634449, + "epoch": 8.81, + "learning_rate": 4.225653157315412e-05, + "loss": 0.0387, + "step": 9274, + "task_loss": 0.07233209162950516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11082993447780609, + "epoch": 8.81, + "learning_rate": 4.224882029409323e-05, + "loss": 0.1144, + "step": 9275, + "task_loss": 0.14643532037734985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014289605431258678, + "epoch": 8.81, + "learning_rate": 4.224110588170106e-05, + "loss": 0.0134, + "step": 9276, + "task_loss": 0.005692243576049805 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0677342414855957, + "epoch": 8.81, + "learning_rate": 4.223338833737898e-05, + "loss": 0.0682, + "step": 9277, + "task_loss": 0.07213495671749115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0438992902636528, + "epoch": 8.81, + "learning_rate": 4.22256676625289e-05, + "loss": 0.0473, + "step": 9278, + "task_loss": 0.07815498113632202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13519689440727234, + "epoch": 8.81, + "learning_rate": 4.221794385855334e-05, + "loss": 0.1365, + "step": 9279, + "task_loss": 0.14803387224674225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017305288463830948, + "epoch": 8.81, + "learning_rate": 4.221021692685534e-05, + "loss": 0.0161, + "step": 9280, + "task_loss": 0.005480002611875534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036162495613098145, + "epoch": 8.81, + "learning_rate": 4.220248686883857e-05, + "loss": 0.0348, + "step": 9281, + "task_loss": 0.022239340469241142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02372421696782112, + "epoch": 8.81, + "learning_rate": 4.21947536859072e-05, + "loss": 0.0221, + "step": 9282, + "task_loss": 0.007254859432578087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.040189243853092194, + "epoch": 8.82, + "learning_rate": 4.218701737946601e-05, + "loss": 0.0424, + "step": 9283, + "task_loss": 0.062415711581707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03214951977133751, + "epoch": 8.82, + "learning_rate": 4.217927795092034e-05, + "loss": 0.0379, + "step": 9284, + "task_loss": 0.08962158858776093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041563909500837326, + "epoch": 8.82, + "learning_rate": 4.21715354016761e-05, + "loss": 0.0388, + "step": 9285, + "task_loss": 0.01408584788441658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05513744056224823, + "epoch": 8.82, + "learning_rate": 4.216378973313976e-05, + "loss": 0.0519, + "step": 9286, + "task_loss": 0.02227054536342621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03985002636909485, + "epoch": 8.82, + "learning_rate": 4.215604094671835e-05, + "loss": 0.0513, + "step": 9287, + "task_loss": 0.15437698364257812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.055905863642692566, + "epoch": 8.82, + "learning_rate": 4.214828904381947e-05, + "loss": 0.0522, + "step": 9288, + "task_loss": 0.01845194399356842 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.049979522824287415, + "epoch": 8.82, + "learning_rate": 4.21405340258513e-05, + "loss": 0.054, + "step": 9289, + "task_loss": 0.09055973589420319 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04201385751366615, + "epoch": 8.82, + "learning_rate": 4.213277589422258e-05, + "loss": 0.0532, + "step": 9290, + "task_loss": 0.1543242484331131 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10822063684463501, + "epoch": 8.82, + "learning_rate": 4.21250146503426e-05, + "loss": 0.106, + "step": 9291, + "task_loss": 0.08604384958744049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02375629171729088, + "epoch": 8.82, + "learning_rate": 4.2117250295621235e-05, + "loss": 0.0341, + "step": 9292, + "task_loss": 0.1269487887620926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028724079951643944, + "epoch": 8.83, + "learning_rate": 4.210948283146892e-05, + "loss": 0.027, + "step": 9293, + "task_loss": 0.011627469211816788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023904111236333847, + "epoch": 8.83, + "learning_rate": 4.210171225929664e-05, + "loss": 0.022, + "step": 9294, + "task_loss": 0.004593405872583389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030907439067959785, + "epoch": 8.83, + "learning_rate": 4.209393858051598e-05, + "loss": 0.0352, + "step": 9295, + "task_loss": 0.0739751011133194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04190927371382713, + "epoch": 8.83, + "learning_rate": 4.208616179653903e-05, + "loss": 0.0506, + "step": 9296, + "task_loss": 0.12926626205444336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011166459880769253, + "epoch": 8.83, + "learning_rate": 4.207838190877852e-05, + "loss": 0.0194, + "step": 9297, + "task_loss": 0.09343065321445465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07796520739793777, + "epoch": 8.83, + "learning_rate": 4.2070598918647683e-05, + "loss": 0.0892, + "step": 9298, + "task_loss": 0.19046609103679657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13977394998073578, + "epoch": 8.83, + "learning_rate": 4.206281282756034e-05, + "loss": 0.137, + "step": 9299, + "task_loss": 0.1115388497710228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01926058530807495, + "epoch": 8.83, + "learning_rate": 4.205502363693087e-05, + "loss": 0.0178, + "step": 9300, + "task_loss": 0.004766935482621193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014616726897656918, + "epoch": 8.83, + "learning_rate": 4.204723134817422e-05, + "loss": 0.0137, + "step": 9301, + "task_loss": 0.005341559648513794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031305767595767975, + "epoch": 8.83, + "learning_rate": 4.2039435962705886e-05, + "loss": 0.0336, + "step": 9302, + "task_loss": 0.054229650646448135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06339511275291443, + "epoch": 8.83, + "learning_rate": 4.2031637481941954e-05, + "loss": 0.0737, + "step": 9303, + "task_loss": 0.1668478399515152 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03651703521609306, + "epoch": 8.84, + "learning_rate": 4.202383590729905e-05, + "loss": 0.0422, + "step": 9304, + "task_loss": 0.09357205033302307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03856692090630531, + "epoch": 8.84, + "learning_rate": 4.201603124019436e-05, + "loss": 0.0393, + "step": 9305, + "task_loss": 0.0458366759121418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022785726934671402, + "epoch": 8.84, + "learning_rate": 4.200822348204565e-05, + "loss": 0.0277, + "step": 9306, + "task_loss": 0.07169666886329651 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.054608479142189026, + "epoch": 8.84, + "learning_rate": 4.200041263427123e-05, + "loss": 0.0635, + "step": 9307, + "task_loss": 0.14374345541000366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0937652736902237, + "epoch": 8.84, + "learning_rate": 4.199259869828998e-05, + "loss": 0.0977, + "step": 9308, + "task_loss": 0.13320079445838928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0214654803276062, + "epoch": 8.84, + "learning_rate": 4.1984781675521345e-05, + "loss": 0.0285, + "step": 9309, + "task_loss": 0.09145978093147278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02630656771361828, + "epoch": 8.84, + "learning_rate": 4.1976961567385306e-05, + "loss": 0.0298, + "step": 9310, + "task_loss": 0.06128865107893944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029763180762529373, + "epoch": 8.84, + "learning_rate": 4.1969138375302445e-05, + "loss": 0.0275, + "step": 9311, + "task_loss": 0.007424212992191315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013515127822756767, + "epoch": 8.84, + "learning_rate": 4.1961312100693874e-05, + "loss": 0.0312, + "step": 9312, + "task_loss": 0.19070707261562347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009992139413952827, + "epoch": 8.84, + "learning_rate": 4.1953482744981274e-05, + "loss": 0.0093, + "step": 9313, + "task_loss": 0.0031170137226581573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08150660246610641, + "epoch": 8.85, + "learning_rate": 4.194565030958688e-05, + "loss": 0.0909, + "step": 9314, + "task_loss": 0.17554497718811035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01873527467250824, + "epoch": 8.85, + "learning_rate": 4.19378147959335e-05, + "loss": 0.0211, + "step": 9315, + "task_loss": 0.04287352040410042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16984689235687256, + "epoch": 8.85, + "learning_rate": 4.192997620544449e-05, + "loss": 0.1605, + "step": 9316, + "task_loss": 0.0767621323466301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06193321570754051, + "epoch": 8.85, + "learning_rate": 4.192213453954377e-05, + "loss": 0.0631, + "step": 9317, + "task_loss": 0.07395564019680023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01895410194993019, + "epoch": 8.85, + "learning_rate": 4.19142897996558e-05, + "loss": 0.0228, + "step": 9318, + "task_loss": 0.05743853747844696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09764152765274048, + "epoch": 8.85, + "learning_rate": 4.190644198720563e-05, + "loss": 0.0953, + "step": 9319, + "task_loss": 0.07411730289459229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02355141192674637, + "epoch": 8.85, + "learning_rate": 4.189859110361886e-05, + "loss": 0.0269, + "step": 9320, + "task_loss": 0.05675677955150604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.23243659734725952, + "epoch": 8.85, + "learning_rate": 4.189073715032163e-05, + "loss": 0.2339, + "step": 9321, + "task_loss": 0.24723902344703674 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057536251842975616, + "epoch": 8.85, + "learning_rate": 4.188288012874065e-05, + "loss": 0.0635, + "step": 9322, + "task_loss": 0.11727714538574219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07297425717115402, + "epoch": 8.85, + "learning_rate": 4.187502004030318e-05, + "loss": 0.0841, + "step": 9323, + "task_loss": 0.1841675043106079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.19478213787078857, + "epoch": 8.85, + "learning_rate": 4.186715688643705e-05, + "loss": 0.1956, + "step": 9324, + "task_loss": 0.202731192111969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16516168415546417, + "epoch": 8.86, + "learning_rate": 4.185929066857064e-05, + "loss": 0.1631, + "step": 9325, + "task_loss": 0.14431238174438477 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.14519473910331726, + "epoch": 8.86, + "learning_rate": 4.1851421388132886e-05, + "loss": 0.1572, + "step": 9326, + "task_loss": 0.2655932903289795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04612842947244644, + "epoch": 8.86, + "learning_rate": 4.1843549046553284e-05, + "loss": 0.0491, + "step": 9327, + "task_loss": 0.07564530521631241 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16417290270328522, + "epoch": 8.86, + "learning_rate": 4.183567364526186e-05, + "loss": 0.1742, + "step": 9328, + "task_loss": 0.26465079188346863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02931727096438408, + "epoch": 8.86, + "learning_rate": 4.182779518568926e-05, + "loss": 0.0414, + "step": 9329, + "task_loss": 0.1504097282886505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05426057428121567, + "epoch": 8.86, + "learning_rate": 4.181991366926661e-05, + "loss": 0.0512, + "step": 9330, + "task_loss": 0.02333644963800907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2061796486377716, + "epoch": 8.86, + "learning_rate": 4.181202909742564e-05, + "loss": 0.2011, + "step": 9331, + "task_loss": 0.15504643321037292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01282942108809948, + "epoch": 8.86, + "learning_rate": 4.1804141471598604e-05, + "loss": 0.026, + "step": 9332, + "task_loss": 0.14491677284240723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057288218289613724, + "epoch": 8.86, + "learning_rate": 4.179625079321836e-05, + "loss": 0.0536, + "step": 9333, + "task_loss": 0.020448647439479828 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06341984868049622, + "epoch": 8.86, + "learning_rate": 4.1788357063718254e-05, + "loss": 0.0675, + "step": 9334, + "task_loss": 0.10468359291553497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05787978321313858, + "epoch": 8.87, + "learning_rate": 4.178046028453224e-05, + "loss": 0.0547, + "step": 9335, + "task_loss": 0.026474833488464355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05362321063876152, + "epoch": 8.87, + "learning_rate": 4.1772560457094795e-05, + "loss": 0.0721, + "step": 9336, + "task_loss": 0.23888705670833588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06569310277700424, + "epoch": 8.87, + "learning_rate": 4.1764657582840965e-05, + "loss": 0.07, + "step": 9337, + "task_loss": 0.10902517288923264 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1104736477136612, + "epoch": 8.87, + "learning_rate": 4.175675166320635e-05, + "loss": 0.1168, + "step": 9338, + "task_loss": 0.17340587079524994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014345163479447365, + "epoch": 8.87, + "learning_rate": 4.1748842699627094e-05, + "loss": 0.0135, + "step": 9339, + "task_loss": 0.00562736950814724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03695271164178848, + "epoch": 8.87, + "learning_rate": 4.17409306935399e-05, + "loss": 0.0341, + "step": 9340, + "task_loss": 0.008127160370349884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01702256128191948, + "epoch": 8.87, + "learning_rate": 4.173301564638201e-05, + "loss": 0.0247, + "step": 9341, + "task_loss": 0.09353862702846527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01691245101392269, + "epoch": 8.87, + "learning_rate": 4.1725097559591256e-05, + "loss": 0.0158, + "step": 9342, + "task_loss": 0.005920737981796265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03731689229607582, + "epoch": 8.87, + "learning_rate": 4.1717176434605967e-05, + "loss": 0.0357, + "step": 9343, + "task_loss": 0.021031979471445084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0925697386264801, + "epoch": 8.87, + "learning_rate": 4.170925227286508e-05, + "loss": 0.1213, + "step": 9344, + "task_loss": 0.38006648421287537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00923939235508442, + "epoch": 8.87, + "learning_rate": 4.170132507580803e-05, + "loss": 0.0155, + "step": 9345, + "task_loss": 0.07144822180271149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009474704042077065, + "epoch": 8.88, + "learning_rate": 4.1693394844874856e-05, + "loss": 0.0154, + "step": 9346, + "task_loss": 0.06834915280342102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0885670930147171, + "epoch": 8.88, + "learning_rate": 4.1685461581506115e-05, + "loss": 0.1049, + "step": 9347, + "task_loss": 0.2523471713066101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1884203851222992, + "epoch": 8.88, + "learning_rate": 4.167752528714291e-05, + "loss": 0.1863, + "step": 9348, + "task_loss": 0.16740942001342773 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013086885213851929, + "epoch": 8.88, + "learning_rate": 4.166958596322692e-05, + "loss": 0.0186, + "step": 9349, + "task_loss": 0.06813879311084747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1107698380947113, + "epoch": 8.88, + "learning_rate": 4.1661643611200366e-05, + "loss": 0.1103, + "step": 9350, + "task_loss": 0.10653585195541382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03403695300221443, + "epoch": 8.88, + "learning_rate": 4.1653698232506e-05, + "loss": 0.0334, + "step": 9351, + "task_loss": 0.02738173119723797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016268638893961906, + "epoch": 8.88, + "learning_rate": 4.1645749828587145e-05, + "loss": 0.0161, + "step": 9352, + "task_loss": 0.014955738559365273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039960239082574844, + "epoch": 8.88, + "learning_rate": 4.1637798400887674e-05, + "loss": 0.0423, + "step": 9353, + "task_loss": 0.06289532035589218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021891163662075996, + "epoch": 8.88, + "learning_rate": 4.162984395085198e-05, + "loss": 0.0206, + "step": 9354, + "task_loss": 0.008558722212910652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05967186763882637, + "epoch": 8.88, + "learning_rate": 4.162188647992506e-05, + "loss": 0.0628, + "step": 9355, + "task_loss": 0.09045903384685516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11606243252754211, + "epoch": 8.89, + "learning_rate": 4.161392598955239e-05, + "loss": 0.1182, + "step": 9356, + "task_loss": 0.13704703748226166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05215151980519295, + "epoch": 8.89, + "learning_rate": 4.160596248118007e-05, + "loss": 0.066, + "step": 9357, + "task_loss": 0.19018231332302094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016069339588284492, + "epoch": 8.89, + "learning_rate": 4.159799595625468e-05, + "loss": 0.015, + "step": 9358, + "task_loss": 0.005058174952864647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016761645674705505, + "epoch": 8.89, + "learning_rate": 4.159002641622338e-05, + "loss": 0.0165, + "step": 9359, + "task_loss": 0.013712173327803612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03098924085497856, + "epoch": 8.89, + "learning_rate": 4.1582053862533895e-05, + "loss": 0.0378, + "step": 9360, + "task_loss": 0.0994289442896843 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1274225413799286, + "epoch": 8.89, + "learning_rate": 4.157407829663446e-05, + "loss": 0.1212, + "step": 9361, + "task_loss": 0.06485594809055328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016975758597254753, + "epoch": 8.89, + "learning_rate": 4.1566099719973884e-05, + "loss": 0.0294, + "step": 9362, + "task_loss": 0.1415422558784485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13337287306785583, + "epoch": 8.89, + "learning_rate": 4.1558118134001514e-05, + "loss": 0.1266, + "step": 9363, + "task_loss": 0.06562095880508423 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09345261007547379, + "epoch": 8.89, + "learning_rate": 4.155013354016723e-05, + "loss": 0.0891, + "step": 9364, + "task_loss": 0.04989039897918701 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06697876751422882, + "epoch": 8.89, + "learning_rate": 4.154214593992149e-05, + "loss": 0.0627, + "step": 9365, + "task_loss": 0.02389654330909252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15374194085597992, + "epoch": 8.89, + "learning_rate": 4.1534155334715264e-05, + "loss": 0.1484, + "step": 9366, + "task_loss": 0.1006234660744667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009657006710767746, + "epoch": 8.9, + "learning_rate": 4.15261617260001e-05, + "loss": 0.0192, + "step": 9367, + "task_loss": 0.10532604157924652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0194817166775465, + "epoch": 8.9, + "learning_rate": 4.151816511522807e-05, + "loss": 0.0183, + "step": 9368, + "task_loss": 0.0075190383940935135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018920117989182472, + "epoch": 8.9, + "learning_rate": 4.151016550385179e-05, + "loss": 0.0241, + "step": 9369, + "task_loss": 0.07103104889392853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015537586063146591, + "epoch": 8.9, + "learning_rate": 4.150216289332443e-05, + "loss": 0.0144, + "step": 9370, + "task_loss": 0.004283284768462181 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07015059888362885, + "epoch": 8.9, + "learning_rate": 4.149415728509971e-05, + "loss": 0.0735, + "step": 9371, + "task_loss": 0.10341081768274307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.17750681936740875, + "epoch": 8.9, + "learning_rate": 4.1486148680631875e-05, + "loss": 0.189, + "step": 9372, + "task_loss": 0.29196876287460327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03413275629281998, + "epoch": 8.9, + "learning_rate": 4.147813708137574e-05, + "loss": 0.0359, + "step": 9373, + "task_loss": 0.051412858068943024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044314898550510406, + "epoch": 8.9, + "learning_rate": 4.1470122488786645e-05, + "loss": 0.0408, + "step": 9374, + "task_loss": 0.00903802365064621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03658336400985718, + "epoch": 8.9, + "learning_rate": 4.146210490432048e-05, + "loss": 0.0348, + "step": 9375, + "task_loss": 0.018489893525838852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01839321479201317, + "epoch": 8.9, + "learning_rate": 4.1454084329433674e-05, + "loss": 0.0171, + "step": 9376, + "task_loss": 0.005394909530878067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04376121610403061, + "epoch": 8.91, + "learning_rate": 4.144606076558321e-05, + "loss": 0.0486, + "step": 9377, + "task_loss": 0.09205228835344315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02223261632025242, + "epoch": 8.91, + "learning_rate": 4.14380342142266e-05, + "loss": 0.029, + "step": 9378, + "task_loss": 0.08994803577661514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029528465121984482, + "epoch": 8.91, + "learning_rate": 4.14300046768219e-05, + "loss": 0.0361, + "step": 9379, + "task_loss": 0.09534671902656555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038068436086177826, + "epoch": 8.91, + "learning_rate": 4.1421972154827724e-05, + "loss": 0.0398, + "step": 9380, + "task_loss": 0.055462270975112915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02015954628586769, + "epoch": 8.91, + "learning_rate": 4.141393664970323e-05, + "loss": 0.0265, + "step": 9381, + "task_loss": 0.0838894322514534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02617390640079975, + "epoch": 8.91, + "learning_rate": 4.140589816290808e-05, + "loss": 0.0296, + "step": 9382, + "task_loss": 0.060920245945453644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010754341259598732, + "epoch": 8.91, + "learning_rate": 4.1397856695902535e-05, + "loss": 0.0171, + "step": 9383, + "task_loss": 0.07444259524345398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.052527911961078644, + "epoch": 8.91, + "learning_rate": 4.138981225014733e-05, + "loss": 0.0651, + "step": 9384, + "task_loss": 0.17810016870498657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035542987287044525, + "epoch": 8.91, + "learning_rate": 4.1381764827103806e-05, + "loss": 0.033, + "step": 9385, + "task_loss": 0.010187897831201553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009268272668123245, + "epoch": 8.91, + "learning_rate": 4.13737144282338e-05, + "loss": 0.0144, + "step": 9386, + "task_loss": 0.06069015711545944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01877554878592491, + "epoch": 8.91, + "learning_rate": 4.1365661054999715e-05, + "loss": 0.0176, + "step": 9387, + "task_loss": 0.007025185972452164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.056551381945610046, + "epoch": 8.92, + "learning_rate": 4.1357604708864475e-05, + "loss": 0.067, + "step": 9388, + "task_loss": 0.16118671000003815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12249293923377991, + "epoch": 8.92, + "learning_rate": 4.1349545391291563e-05, + "loss": 0.1238, + "step": 9389, + "task_loss": 0.13515231013298035 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04289904981851578, + "epoch": 8.92, + "learning_rate": 4.1341483103745006e-05, + "loss": 0.0393, + "step": 9390, + "task_loss": 0.006975535303354263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047507885843515396, + "epoch": 8.92, + "learning_rate": 4.133341784768933e-05, + "loss": 0.0585, + "step": 9391, + "task_loss": 0.15751934051513672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03881534934043884, + "epoch": 8.92, + "learning_rate": 4.132534962458962e-05, + "loss": 0.0365, + "step": 9392, + "task_loss": 0.01544598676264286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027173154056072235, + "epoch": 8.92, + "learning_rate": 4.131727843591155e-05, + "loss": 0.025, + "step": 9393, + "task_loss": 0.00519617460668087 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0583278089761734, + "epoch": 8.92, + "learning_rate": 4.130920428312127e-05, + "loss": 0.0568, + "step": 9394, + "task_loss": 0.042698342353105545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1009674221277237, + "epoch": 8.92, + "learning_rate": 4.130112716768548e-05, + "loss": 0.102, + "step": 9395, + "task_loss": 0.11100717633962631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07862529158592224, + "epoch": 8.92, + "learning_rate": 4.129304709107143e-05, + "loss": 0.0764, + "step": 9396, + "task_loss": 0.0560125857591629 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01500142365694046, + "epoch": 8.92, + "learning_rate": 4.128496405474691e-05, + "loss": 0.014, + "step": 9397, + "task_loss": 0.005422631278634071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025359123945236206, + "epoch": 8.92, + "learning_rate": 4.127687806018024e-05, + "loss": 0.0406, + "step": 9398, + "task_loss": 0.1776316612958908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.061490267515182495, + "epoch": 8.93, + "learning_rate": 4.1268789108840275e-05, + "loss": 0.0665, + "step": 9399, + "task_loss": 0.11160209774971008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05927234888076782, + "epoch": 8.93, + "learning_rate": 4.126069720219642e-05, + "loss": 0.0682, + "step": 9400, + "task_loss": 0.14884845912456512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12741845846176147, + "epoch": 8.93, + "learning_rate": 4.125260234171861e-05, + "loss": 0.1217, + "step": 9401, + "task_loss": 0.07042922079563141 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020310278981924057, + "epoch": 8.93, + "learning_rate": 4.12445045288773e-05, + "loss": 0.0244, + "step": 9402, + "task_loss": 0.06154558062553406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.049872465431690216, + "epoch": 8.93, + "learning_rate": 4.123640376514353e-05, + "loss": 0.0495, + "step": 9403, + "task_loss": 0.04623175412416458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018078003078699112, + "epoch": 8.93, + "learning_rate": 4.12283000519888e-05, + "loss": 0.0304, + "step": 9404, + "task_loss": 0.14123806357383728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013958177529275417, + "epoch": 8.93, + "learning_rate": 4.122019339088522e-05, + "loss": 0.0213, + "step": 9405, + "task_loss": 0.08724473416805267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020791584625840187, + "epoch": 8.93, + "learning_rate": 4.121208378330539e-05, + "loss": 0.0194, + "step": 9406, + "task_loss": 0.006587089970707893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03328198194503784, + "epoch": 8.93, + "learning_rate": 4.120397123072246e-05, + "loss": 0.0314, + "step": 9407, + "task_loss": 0.014744751155376434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017374586313962936, + "epoch": 8.93, + "learning_rate": 4.119585573461012e-05, + "loss": 0.016, + "step": 9408, + "task_loss": 0.0033780932426452637 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03562571480870247, + "epoch": 8.94, + "learning_rate": 4.118773729644258e-05, + "loss": 0.0412, + "step": 9409, + "task_loss": 0.09113264083862305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043064821511507034, + "epoch": 8.94, + "learning_rate": 4.11796159176946e-05, + "loss": 0.0501, + "step": 9410, + "task_loss": 0.11316752433776855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08931790292263031, + "epoch": 8.94, + "learning_rate": 4.117149159984147e-05, + "loss": 0.0858, + "step": 9411, + "task_loss": 0.0536871999502182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0797976404428482, + "epoch": 8.94, + "learning_rate": 4.116336434435901e-05, + "loss": 0.0779, + "step": 9412, + "task_loss": 0.060566820204257965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057995155453681946, + "epoch": 8.94, + "learning_rate": 4.115523415272358e-05, + "loss": 0.0555, + "step": 9413, + "task_loss": 0.033352144062519073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03625299036502838, + "epoch": 8.94, + "learning_rate": 4.1147101026412046e-05, + "loss": 0.0428, + "step": 9414, + "task_loss": 0.10129890590906143 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04442795366048813, + "epoch": 8.94, + "learning_rate": 4.1138964966901853e-05, + "loss": 0.0427, + "step": 9415, + "task_loss": 0.027060629799962044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01638917066156864, + "epoch": 8.94, + "learning_rate": 4.113082597567095e-05, + "loss": 0.0203, + "step": 9416, + "task_loss": 0.055116184055805206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041155144572257996, + "epoch": 8.94, + "learning_rate": 4.112268405419782e-05, + "loss": 0.0521, + "step": 9417, + "task_loss": 0.1504831463098526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028086768463253975, + "epoch": 8.94, + "learning_rate": 4.1114539203961476e-05, + "loss": 0.0373, + "step": 9418, + "task_loss": 0.12012705206871033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026071809232234955, + "epoch": 8.94, + "learning_rate": 4.110639142644149e-05, + "loss": 0.0252, + "step": 9419, + "task_loss": 0.016939345747232437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024052530527114868, + "epoch": 8.95, + "learning_rate": 4.109824072311792e-05, + "loss": 0.0352, + "step": 9420, + "task_loss": 0.13542687892913818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.048306502401828766, + "epoch": 8.95, + "learning_rate": 4.10900870954714e-05, + "loss": 0.047, + "step": 9421, + "task_loss": 0.03500698506832123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0342412106692791, + "epoch": 8.95, + "learning_rate": 4.108193054498307e-05, + "loss": 0.0553, + "step": 9422, + "task_loss": 0.24514110386371613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013718321919441223, + "epoch": 8.95, + "learning_rate": 4.10737710731346e-05, + "loss": 0.0214, + "step": 9423, + "task_loss": 0.09085668623447418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013474099338054657, + "epoch": 8.95, + "learning_rate": 4.106560868140821e-05, + "loss": 0.0191, + "step": 9424, + "task_loss": 0.07011019438505173 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0738307386636734, + "epoch": 8.95, + "learning_rate": 4.105744337128662e-05, + "loss": 0.0698, + "step": 9425, + "task_loss": 0.0339224673807621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04669392481446266, + "epoch": 8.95, + "learning_rate": 4.104927514425312e-05, + "loss": 0.0546, + "step": 9426, + "task_loss": 0.12560123205184937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04168114811182022, + "epoch": 8.95, + "learning_rate": 4.104110400179148e-05, + "loss": 0.039, + "step": 9427, + "task_loss": 0.015049118548631668 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020040128380060196, + "epoch": 8.95, + "learning_rate": 4.103292994538605e-05, + "loss": 0.0184, + "step": 9428, + "task_loss": 0.00409487821161747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07207145541906357, + "epoch": 8.95, + "learning_rate": 4.102475297652168e-05, + "loss": 0.0685, + "step": 9429, + "task_loss": 0.036523304879665375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01679806597530842, + "epoch": 8.96, + "learning_rate": 4.1016573096683765e-05, + "loss": 0.0215, + "step": 9430, + "task_loss": 0.0642915591597557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020311608910560608, + "epoch": 8.96, + "learning_rate": 4.10083903073582e-05, + "loss": 0.0239, + "step": 9431, + "task_loss": 0.05604963004589081 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02877676486968994, + "epoch": 8.96, + "learning_rate": 4.1000204610031447e-05, + "loss": 0.0267, + "step": 9432, + "task_loss": 0.007656911388039589 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024545563384890556, + "epoch": 8.96, + "learning_rate": 4.0992016006190456e-05, + "loss": 0.0326, + "step": 9433, + "task_loss": 0.10507547855377197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05073285847902298, + "epoch": 8.96, + "learning_rate": 4.0983824497322755e-05, + "loss": 0.0484, + "step": 9434, + "task_loss": 0.027833662927150726 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027377085760235786, + "epoch": 8.96, + "learning_rate": 4.0975630084916344e-05, + "loss": 0.0253, + "step": 9435, + "task_loss": 0.0062708742916584015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015949569642543793, + "epoch": 8.96, + "learning_rate": 4.096743277045979e-05, + "loss": 0.0235, + "step": 9436, + "task_loss": 0.09101761877536774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06154026836156845, + "epoch": 8.96, + "learning_rate": 4.0959232555442174e-05, + "loss": 0.0634, + "step": 9437, + "task_loss": 0.07965725660324097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039994481950998306, + "epoch": 8.96, + "learning_rate": 4.0951029441353104e-05, + "loss": 0.0373, + "step": 9438, + "task_loss": 0.012804200872778893 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07882995158433914, + "epoch": 8.96, + "learning_rate": 4.094282342968271e-05, + "loss": 0.0807, + "step": 9439, + "task_loss": 0.09789763391017914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09340311586856842, + "epoch": 8.96, + "learning_rate": 4.093461452192167e-05, + "loss": 0.0986, + "step": 9440, + "task_loss": 0.1451651006937027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0292808897793293, + "epoch": 8.97, + "learning_rate": 4.092640271956115e-05, + "loss": 0.0349, + "step": 9441, + "task_loss": 0.08514495939016342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1028076782822609, + "epoch": 8.97, + "learning_rate": 4.091818802409288e-05, + "loss": 0.112, + "step": 9442, + "task_loss": 0.19489334523677826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10867805033922195, + "epoch": 8.97, + "learning_rate": 4.0909970437009096e-05, + "loss": 0.1137, + "step": 9443, + "task_loss": 0.1586572676897049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012916838750243187, + "epoch": 8.97, + "learning_rate": 4.0901749959802546e-05, + "loss": 0.0168, + "step": 9444, + "task_loss": 0.05206574872136116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1345238983631134, + "epoch": 8.97, + "learning_rate": 4.0893526593966535e-05, + "loss": 0.1296, + "step": 9445, + "task_loss": 0.0848974958062172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07488267123699188, + "epoch": 8.97, + "learning_rate": 4.088530034099488e-05, + "loss": 0.0872, + "step": 9446, + "task_loss": 0.1976650059223175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041900884360075, + "epoch": 8.97, + "learning_rate": 4.087707120238191e-05, + "loss": 0.0388, + "step": 9447, + "task_loss": 0.010816860944032669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019380036741495132, + "epoch": 8.97, + "learning_rate": 4.0868839179622495e-05, + "loss": 0.0188, + "step": 9448, + "task_loss": 0.013160983100533485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07051914930343628, + "epoch": 8.97, + "learning_rate": 4.086060427421202e-05, + "loss": 0.0652, + "step": 9449, + "task_loss": 0.01760847680270672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0432322658598423, + "epoch": 8.97, + "learning_rate": 4.0852366487646384e-05, + "loss": 0.0517, + "step": 9450, + "task_loss": 0.12742291390895844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027135606855154037, + "epoch": 8.98, + "learning_rate": 4.084412582142204e-05, + "loss": 0.0364, + "step": 9451, + "task_loss": 0.1198822483420372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15044289827346802, + "epoch": 8.98, + "learning_rate": 4.083588227703593e-05, + "loss": 0.1615, + "step": 9452, + "task_loss": 0.26102936267852783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0592607781291008, + "epoch": 8.98, + "learning_rate": 4.0827635855985534e-05, + "loss": 0.0603, + "step": 9453, + "task_loss": 0.06970083713531494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09907069802284241, + "epoch": 8.98, + "learning_rate": 4.081938655976886e-05, + "loss": 0.1052, + "step": 9454, + "task_loss": 0.16077247262001038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028371935710310936, + "epoch": 8.98, + "learning_rate": 4.0811134389884433e-05, + "loss": 0.0264, + "step": 9455, + "task_loss": 0.008428094908595085 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0345245860517025, + "epoch": 8.98, + "learning_rate": 4.08028793478313e-05, + "loss": 0.0411, + "step": 9456, + "task_loss": 0.09978184103965759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.049868617206811905, + "epoch": 8.98, + "learning_rate": 4.0794621435109015e-05, + "loss": 0.0674, + "step": 9457, + "task_loss": 0.22498998045921326 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02840796858072281, + "epoch": 8.98, + "learning_rate": 4.0786360653217684e-05, + "loss": 0.0265, + "step": 9458, + "task_loss": 0.009237809106707573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03639814257621765, + "epoch": 8.98, + "learning_rate": 4.0778097003657915e-05, + "loss": 0.0344, + "step": 9459, + "task_loss": 0.01689404621720314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020641740411520004, + "epoch": 8.98, + "learning_rate": 4.0769830487930835e-05, + "loss": 0.0251, + "step": 9460, + "task_loss": 0.0654524564743042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05284133553504944, + "epoch": 8.98, + "learning_rate": 4.07615611075381e-05, + "loss": 0.0577, + "step": 9461, + "task_loss": 0.10118487477302551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08153997361660004, + "epoch": 8.99, + "learning_rate": 4.075328886398188e-05, + "loss": 0.0832, + "step": 9462, + "task_loss": 0.09851934015750885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024140600115060806, + "epoch": 8.99, + "learning_rate": 4.074501375876487e-05, + "loss": 0.0225, + "step": 9463, + "task_loss": 0.008064748719334602 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04541175067424774, + "epoch": 8.99, + "learning_rate": 4.073673579339028e-05, + "loss": 0.0472, + "step": 9464, + "task_loss": 0.06317553669214249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10922182351350784, + "epoch": 8.99, + "learning_rate": 4.0728454969361854e-05, + "loss": 0.104, + "step": 9465, + "task_loss": 0.05705301836133003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04132990166544914, + "epoch": 8.99, + "learning_rate": 4.0720171288183815e-05, + "loss": 0.0419, + "step": 9466, + "task_loss": 0.04671701043844223 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0743739902973175, + "epoch": 8.99, + "learning_rate": 4.0711884751360964e-05, + "loss": 0.0726, + "step": 9467, + "task_loss": 0.05616597831249237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05336568504571915, + "epoch": 8.99, + "learning_rate": 4.070359536039858e-05, + "loss": 0.0536, + "step": 9468, + "task_loss": 0.055919770151376724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12312227487564087, + "epoch": 8.99, + "learning_rate": 4.069530311680247e-05, + "loss": 0.1289, + "step": 9469, + "task_loss": 0.180916890501976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0300409235060215, + "epoch": 8.99, + "learning_rate": 4.068700802207895e-05, + "loss": 0.0301, + "step": 9470, + "task_loss": 0.030945636332035065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059253282845020294, + "epoch": 8.99, + "learning_rate": 4.0678710077734885e-05, + "loss": 0.0671, + "step": 9471, + "task_loss": 0.13786810636520386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02918911725282669, + "epoch": 9.0, + "learning_rate": 4.0670409285277614e-05, + "loss": 0.0329, + "step": 9472, + "task_loss": 0.06589201092720032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1608899086713791, + "epoch": 9.0, + "learning_rate": 4.0662105646215034e-05, + "loss": 0.1544, + "step": 9473, + "task_loss": 0.0955737829208374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.045890383422374725, + "epoch": 9.0, + "learning_rate": 4.065379916205554e-05, + "loss": 0.0452, + "step": 9474, + "task_loss": 0.03897964581847191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0502871535718441, + "epoch": 9.0, + "learning_rate": 4.0645489834308024e-05, + "loss": 0.0519, + "step": 9475, + "task_loss": 0.06652585417032242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12040196359157562, + "epoch": 9.0, + "learning_rate": 4.063717766448194e-05, + "loss": 0.1166, + "step": 9476, + "task_loss": 0.08265364170074463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09221913665533066, + "epoch": 9.0, + "learning_rate": 4.062886265408722e-05, + "loss": 0.0868, + "step": 9477, + "task_loss": 0.037736404687166214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08050169050693512, + "epoch": 9.0, + "learning_rate": 4.062054480463433e-05, + "loss": 0.0768, + "step": 9478, + "task_loss": 0.043371133506298065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02520603872835636, + "epoch": 9.0, + "learning_rate": 4.0612224117634245e-05, + "loss": 0.0235, + "step": 9479, + "task_loss": 0.00836150161921978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018169641494750977, + "epoch": 9.0, + "learning_rate": 4.060390059459846e-05, + "loss": 0.0285, + "step": 9480, + "task_loss": 0.1218915730714798 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09185026586055756, + "epoch": 9.0, + "learning_rate": 4.059557423703899e-05, + "loss": 0.0925, + "step": 9481, + "task_loss": 0.09815002977848053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016890790313482285, + "epoch": 9.0, + "learning_rate": 4.058724504646834e-05, + "loss": 0.024, + "step": 9482, + "task_loss": 0.08820458501577377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01857912167906761, + "epoch": 9.01, + "learning_rate": 4.0578913024399564e-05, + "loss": 0.0222, + "step": 9483, + "task_loss": 0.05494304373860359 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0701369047164917, + "epoch": 9.01, + "learning_rate": 4.057057817234621e-05, + "loss": 0.0669, + "step": 9484, + "task_loss": 0.03729560971260071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017086252570152283, + "epoch": 9.01, + "learning_rate": 4.0562240491822334e-05, + "loss": 0.0283, + "step": 9485, + "task_loss": 0.1295742690563202 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0375320240855217, + "epoch": 9.01, + "learning_rate": 4.055389998434253e-05, + "loss": 0.0376, + "step": 9486, + "task_loss": 0.03832355886697769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022690830752253532, + "epoch": 9.01, + "learning_rate": 4.054555665142189e-05, + "loss": 0.0255, + "step": 9487, + "task_loss": 0.05128246918320656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011978648602962494, + "epoch": 9.01, + "learning_rate": 4.053721049457601e-05, + "loss": 0.0112, + "step": 9488, + "task_loss": 0.004542630165815353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014200868085026741, + "epoch": 9.01, + "learning_rate": 4.052886151532101e-05, + "loss": 0.0218, + "step": 9489, + "task_loss": 0.09042149782180786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.049319952726364136, + "epoch": 9.01, + "learning_rate": 4.0520509715173544e-05, + "loss": 0.046, + "step": 9490, + "task_loss": 0.016364455223083496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024487420916557312, + "epoch": 9.01, + "learning_rate": 4.051215509565073e-05, + "loss": 0.0227, + "step": 9491, + "task_loss": 0.006308834999799728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018296554684638977, + "epoch": 9.01, + "learning_rate": 4.050379765827024e-05, + "loss": 0.0169, + "step": 9492, + "task_loss": 0.003857152536511421 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04161164537072182, + "epoch": 9.02, + "learning_rate": 4.0495437404550233e-05, + "loss": 0.0451, + "step": 9493, + "task_loss": 0.07690572738647461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022137103602290154, + "epoch": 9.02, + "learning_rate": 4.04870743360094e-05, + "loss": 0.0273, + "step": 9494, + "task_loss": 0.07371386885643005 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01800430938601494, + "epoch": 9.02, + "learning_rate": 4.047870845416693e-05, + "loss": 0.017, + "step": 9495, + "task_loss": 0.00811527669429779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024769477546215057, + "epoch": 9.02, + "learning_rate": 4.0470339760542506e-05, + "loss": 0.0336, + "step": 9496, + "task_loss": 0.11265938729047775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05644622817635536, + "epoch": 9.02, + "learning_rate": 4.0461968256656376e-05, + "loss": 0.055, + "step": 9497, + "task_loss": 0.042227305471897125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010226700454950333, + "epoch": 9.02, + "learning_rate": 4.045359394402925e-05, + "loss": 0.0097, + "step": 9498, + "task_loss": 0.005248824134469032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020847581326961517, + "epoch": 9.02, + "learning_rate": 4.0445216824182344e-05, + "loss": 0.0196, + "step": 9499, + "task_loss": 0.008282596245408058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11503365635871887, + "epoch": 9.02, + "learning_rate": 4.043683689863742e-05, + "loss": 0.1119, + "step": 9500, + "task_loss": 0.08395393192768097 + }, + { + "epoch": 9.02, + "eval_accuracy": 0.8910550458715596, + "eval_loss": 0.4567631483078003, + "eval_runtime": 18.0265, + "eval_samples_per_second": 48.373, + "eval_steps_per_second": 6.047, + "step": 9500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04205450415611267, + "epoch": 9.02, + "learning_rate": 4.042845416891673e-05, + "loss": 0.0486, + "step": 9501, + "task_loss": 0.10712815821170807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06236998736858368, + "epoch": 9.02, + "learning_rate": 4.042006863654303e-05, + "loss": 0.0662, + "step": 9502, + "task_loss": 0.10055098682641983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019925886765122414, + "epoch": 9.02, + "learning_rate": 4.041168030303961e-05, + "loss": 0.0187, + "step": 9503, + "task_loss": 0.008159628137946129 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06345760822296143, + "epoch": 9.03, + "learning_rate": 4.0403289169930235e-05, + "loss": 0.0664, + "step": 9504, + "task_loss": 0.09281642735004425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023364584892988205, + "epoch": 9.03, + "learning_rate": 4.03948952387392e-05, + "loss": 0.0272, + "step": 9505, + "task_loss": 0.06141924858093262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02729855477809906, + "epoch": 9.03, + "learning_rate": 4.03864985109913e-05, + "loss": 0.0263, + "step": 9506, + "task_loss": 0.016974736005067825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01486403401941061, + "epoch": 9.03, + "learning_rate": 4.0378098988211845e-05, + "loss": 0.0137, + "step": 9507, + "task_loss": 0.0035839397460222244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031053446233272552, + "epoch": 9.03, + "learning_rate": 4.036969667192665e-05, + "loss": 0.0616, + "step": 9508, + "task_loss": 0.3368293344974518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07761267572641373, + "epoch": 9.03, + "learning_rate": 4.036129156366203e-05, + "loss": 0.0834, + "step": 9509, + "task_loss": 0.13566073775291443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1153075098991394, + "epoch": 9.03, + "learning_rate": 4.0352883664944816e-05, + "loss": 0.11, + "step": 9510, + "task_loss": 0.06233564764261246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015643972903490067, + "epoch": 9.03, + "learning_rate": 4.034447297730234e-05, + "loss": 0.0146, + "step": 9511, + "task_loss": 0.005513627082109451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04660728946328163, + "epoch": 9.03, + "learning_rate": 4.033605950226246e-05, + "loss": 0.0524, + "step": 9512, + "task_loss": 0.10417984426021576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01021486520767212, + "epoch": 9.03, + "learning_rate": 4.03276432413535e-05, + "loss": 0.0097, + "step": 9513, + "task_loss": 0.004828939214348793 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030726449564099312, + "epoch": 9.04, + "learning_rate": 4.0319224196104334e-05, + "loss": 0.0285, + "step": 9514, + "task_loss": 0.00808459147810936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0746147632598877, + "epoch": 9.04, + "learning_rate": 4.031080236804431e-05, + "loss": 0.0731, + "step": 9515, + "task_loss": 0.05903128162026405 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06045572832226753, + "epoch": 9.04, + "learning_rate": 4.03023777587033e-05, + "loss": 0.073, + "step": 9516, + "task_loss": 0.18590320646762848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015560157597064972, + "epoch": 9.04, + "learning_rate": 4.029395036961168e-05, + "loss": 0.0148, + "step": 9517, + "task_loss": 0.007637334987521172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02795090712606907, + "epoch": 9.04, + "learning_rate": 4.028552020230031e-05, + "loss": 0.0278, + "step": 9518, + "task_loss": 0.026432598009705544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016976818442344666, + "epoch": 9.04, + "learning_rate": 4.0277087258300575e-05, + "loss": 0.0156, + "step": 9519, + "task_loss": 0.0029955413192510605 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010076291859149933, + "epoch": 9.04, + "learning_rate": 4.0268651539144374e-05, + "loss": 0.0094, + "step": 9520, + "task_loss": 0.003336844965815544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04456236958503723, + "epoch": 9.04, + "learning_rate": 4.026021304636408e-05, + "loss": 0.0541, + "step": 9521, + "task_loss": 0.13946330547332764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02801411598920822, + "epoch": 9.04, + "learning_rate": 4.0251771781492594e-05, + "loss": 0.0277, + "step": 9522, + "task_loss": 0.02517678588628769 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02441570535302162, + "epoch": 9.04, + "learning_rate": 4.0243327746063315e-05, + "loss": 0.0303, + "step": 9523, + "task_loss": 0.08282805979251862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03433768451213837, + "epoch": 9.04, + "learning_rate": 4.0234880941610134e-05, + "loss": 0.0363, + "step": 9524, + "task_loss": 0.05413848161697388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016913043335080147, + "epoch": 9.05, + "learning_rate": 4.022643136966746e-05, + "loss": 0.026, + "step": 9525, + "task_loss": 0.10786878317594528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07082056999206543, + "epoch": 9.05, + "learning_rate": 4.021797903177019e-05, + "loss": 0.0752, + "step": 9526, + "task_loss": 0.114901602268219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011534811928868294, + "epoch": 9.05, + "learning_rate": 4.0209523929453744e-05, + "loss": 0.0107, + "step": 9527, + "task_loss": 0.002887837588787079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018662162125110626, + "epoch": 9.05, + "learning_rate": 4.0201066064254026e-05, + "loss": 0.0173, + "step": 9528, + "task_loss": 0.004804331809282303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1188516914844513, + "epoch": 9.05, + "learning_rate": 4.019260543770745e-05, + "loss": 0.1143, + "step": 9529, + "task_loss": 0.07374230027198792 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022575611248612404, + "epoch": 9.05, + "learning_rate": 4.018414205135093e-05, + "loss": 0.0265, + "step": 9530, + "task_loss": 0.06145612150430679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012420643121004105, + "epoch": 9.05, + "learning_rate": 4.017567590672187e-05, + "loss": 0.0117, + "step": 9531, + "task_loss": 0.005104459822177887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032812122255563736, + "epoch": 9.05, + "learning_rate": 4.01672070053582e-05, + "loss": 0.0399, + "step": 9532, + "task_loss": 0.10327274352312088 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023092597723007202, + "epoch": 9.05, + "learning_rate": 4.015873534879833e-05, + "loss": 0.0245, + "step": 9533, + "task_loss": 0.03673432022333145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037473469972610474, + "epoch": 9.05, + "learning_rate": 4.015026093858119e-05, + "loss": 0.0472, + "step": 9534, + "task_loss": 0.13475218415260315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05353183299303055, + "epoch": 9.06, + "learning_rate": 4.014178377624617e-05, + "loss": 0.0514, + "step": 9535, + "task_loss": 0.032544951885938644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02789144590497017, + "epoch": 9.06, + "learning_rate": 4.013330386333321e-05, + "loss": 0.0303, + "step": 9536, + "task_loss": 0.052158139646053314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009651558473706245, + "epoch": 9.06, + "learning_rate": 4.012482120138272e-05, + "loss": 0.0091, + "step": 9537, + "task_loss": 0.003987642005085945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0357995480298996, + "epoch": 9.06, + "learning_rate": 4.011633579193561e-05, + "loss": 0.0406, + "step": 9538, + "task_loss": 0.08331019431352615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017858661711215973, + "epoch": 9.06, + "learning_rate": 4.010784763653331e-05, + "loss": 0.0252, + "step": 9539, + "task_loss": 0.09111450612545013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06131261587142944, + "epoch": 9.06, + "learning_rate": 4.0099356736717725e-05, + "loss": 0.0596, + "step": 9540, + "task_loss": 0.04372568055987358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021268391981720924, + "epoch": 9.06, + "learning_rate": 4.0090863094031274e-05, + "loss": 0.0294, + "step": 9541, + "task_loss": 0.10256145894527435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030995316803455353, + "epoch": 9.06, + "learning_rate": 4.008236671001686e-05, + "loss": 0.0286, + "step": 9542, + "task_loss": 0.007312217727303505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016386138275265694, + "epoch": 9.06, + "learning_rate": 4.0073867586217895e-05, + "loss": 0.0153, + "step": 9543, + "task_loss": 0.005437880754470825 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02716147154569626, + "epoch": 9.06, + "learning_rate": 4.006536572417828e-05, + "loss": 0.0256, + "step": 9544, + "task_loss": 0.011989755555987358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03949110209941864, + "epoch": 9.06, + "learning_rate": 4.0056861125442435e-05, + "loss": 0.0435, + "step": 9545, + "task_loss": 0.07948499917984009 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.060796599835157394, + "epoch": 9.07, + "learning_rate": 4.004835379155525e-05, + "loss": 0.0645, + "step": 9546, + "task_loss": 0.09797711670398712 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00800246186554432, + "epoch": 9.07, + "learning_rate": 4.003984372406212e-05, + "loss": 0.0078, + "step": 9547, + "task_loss": 0.0055456701666116714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01699589565396309, + "epoch": 9.07, + "learning_rate": 4.003133092450895e-05, + "loss": 0.0158, + "step": 9548, + "task_loss": 0.005228973925113678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0232367105782032, + "epoch": 9.07, + "learning_rate": 4.002281539444213e-05, + "loss": 0.03, + "step": 9549, + "task_loss": 0.09088429808616638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027070309966802597, + "epoch": 9.07, + "learning_rate": 4.001429713540853e-05, + "loss": 0.0249, + "step": 9550, + "task_loss": 0.005381651222705841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030805271118879318, + "epoch": 9.07, + "learning_rate": 4.000577614895555e-05, + "loss": 0.031, + "step": 9551, + "task_loss": 0.03288976103067398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15201908349990845, + "epoch": 9.07, + "learning_rate": 3.999725243663107e-05, + "loss": 0.1577, + "step": 9552, + "task_loss": 0.2092512547969818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03263195604085922, + "epoch": 9.07, + "learning_rate": 3.9988725999983456e-05, + "loss": 0.0373, + "step": 9553, + "task_loss": 0.07890980690717697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13403475284576416, + "epoch": 9.07, + "learning_rate": 3.998019684056158e-05, + "loss": 0.1317, + "step": 9554, + "task_loss": 0.11034642159938812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04724042862653732, + "epoch": 9.07, + "learning_rate": 3.99716649599148e-05, + "loss": 0.0483, + "step": 9555, + "task_loss": 0.058092519640922546 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0314975380897522, + "epoch": 9.08, + "learning_rate": 3.996313035959297e-05, + "loss": 0.037, + "step": 9556, + "task_loss": 0.0861077755689621 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01337359193712473, + "epoch": 9.08, + "learning_rate": 3.995459304114645e-05, + "loss": 0.0126, + "step": 9557, + "task_loss": 0.005379321053624153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02194923162460327, + "epoch": 9.08, + "learning_rate": 3.9946053006126086e-05, + "loss": 0.0286, + "step": 9558, + "task_loss": 0.08856151252985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021245867013931274, + "epoch": 9.08, + "learning_rate": 3.993751025608321e-05, + "loss": 0.0271, + "step": 9559, + "task_loss": 0.0795937329530716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025511398911476135, + "epoch": 9.08, + "learning_rate": 3.9928964792569655e-05, + "loss": 0.0235, + "step": 9560, + "task_loss": 0.005805861204862595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018126703798770905, + "epoch": 9.08, + "learning_rate": 3.9920416617137745e-05, + "loss": 0.017, + "step": 9561, + "task_loss": 0.007210768759250641 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018573222681879997, + "epoch": 9.08, + "learning_rate": 3.9911865731340306e-05, + "loss": 0.0233, + "step": 9562, + "task_loss": 0.06605537980794907 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023824790492653847, + "epoch": 9.08, + "learning_rate": 3.9903312136730634e-05, + "loss": 0.0321, + "step": 9563, + "task_loss": 0.10653051733970642 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10854936391115189, + "epoch": 9.08, + "learning_rate": 3.989475583486254e-05, + "loss": 0.105, + "step": 9564, + "task_loss": 0.07346247136592865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019099000841379166, + "epoch": 9.08, + "learning_rate": 3.988619682729032e-05, + "loss": 0.0268, + "step": 9565, + "task_loss": 0.09585027396678925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021908055990934372, + "epoch": 9.08, + "learning_rate": 3.987763511556874e-05, + "loss": 0.0296, + "step": 9566, + "task_loss": 0.09896925091743469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013894637115299702, + "epoch": 9.09, + "learning_rate": 3.98690707012531e-05, + "loss": 0.0128, + "step": 9567, + "task_loss": 0.0032347403466701508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03824283555150032, + "epoch": 9.09, + "learning_rate": 3.986050358589916e-05, + "loss": 0.0488, + "step": 9568, + "task_loss": 0.14340779185295105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03835334628820419, + "epoch": 9.09, + "learning_rate": 3.9851933771063166e-05, + "loss": 0.0349, + "step": 9569, + "task_loss": 0.003623614087700844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03116435743868351, + "epoch": 9.09, + "learning_rate": 3.9843361258301876e-05, + "loss": 0.0385, + "step": 9570, + "task_loss": 0.10502012073993683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04906230419874191, + "epoch": 9.09, + "learning_rate": 3.983478604917253e-05, + "loss": 0.0574, + "step": 9571, + "task_loss": 0.13289552927017212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04502825438976288, + "epoch": 9.09, + "learning_rate": 3.9826208145232855e-05, + "loss": 0.0679, + "step": 9572, + "task_loss": 0.273649126291275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036221615970134735, + "epoch": 9.09, + "learning_rate": 3.981762754804107e-05, + "loss": 0.0414, + "step": 9573, + "task_loss": 0.08760883659124374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05786776542663574, + "epoch": 9.09, + "learning_rate": 3.980904425915586e-05, + "loss": 0.0567, + "step": 9574, + "task_loss": 0.04585399478673935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0210392028093338, + "epoch": 9.09, + "learning_rate": 3.9800458280136453e-05, + "loss": 0.0198, + "step": 9575, + "task_loss": 0.008328957483172417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047202907502651215, + "epoch": 9.09, + "learning_rate": 3.979186961254252e-05, + "loss": 0.0497, + "step": 9576, + "task_loss": 0.07255647331476212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02497425302863121, + "epoch": 9.09, + "learning_rate": 3.9783278257934233e-05, + "loss": 0.0233, + "step": 9577, + "task_loss": 0.00862952508032322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031044363975524902, + "epoch": 9.1, + "learning_rate": 3.977468421787225e-05, + "loss": 0.0361, + "step": 9578, + "task_loss": 0.08184700459241867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023872077465057373, + "epoch": 9.1, + "learning_rate": 3.976608749391773e-05, + "loss": 0.0291, + "step": 9579, + "task_loss": 0.0763431191444397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021440522745251656, + "epoch": 9.1, + "learning_rate": 3.975748808763229e-05, + "loss": 0.0202, + "step": 9580, + "task_loss": 0.009024685248732567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017536599189043045, + "epoch": 9.1, + "learning_rate": 3.974888600057808e-05, + "loss": 0.0187, + "step": 9581, + "task_loss": 0.029022112488746643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03331207111477852, + "epoch": 9.1, + "learning_rate": 3.974028123431769e-05, + "loss": 0.0486, + "step": 9582, + "task_loss": 0.1862955540418625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01485717948526144, + "epoch": 9.1, + "learning_rate": 3.973167379041421e-05, + "loss": 0.0142, + "step": 9583, + "task_loss": 0.0080320555716753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03911569342017174, + "epoch": 9.1, + "learning_rate": 3.972306367043126e-05, + "loss": 0.043, + "step": 9584, + "task_loss": 0.07806295156478882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1487078070640564, + "epoch": 9.1, + "learning_rate": 3.971445087593288e-05, + "loss": 0.1381, + "step": 9585, + "task_loss": 0.04280809685587883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01248634047806263, + "epoch": 9.1, + "learning_rate": 3.970583540848363e-05, + "loss": 0.0155, + "step": 9586, + "task_loss": 0.04234500974416733 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015881624072790146, + "epoch": 9.1, + "learning_rate": 3.969721726964856e-05, + "loss": 0.0236, + "step": 9587, + "task_loss": 0.09326736629009247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028029238805174828, + "epoch": 9.11, + "learning_rate": 3.9688596460993176e-05, + "loss": 0.0332, + "step": 9588, + "task_loss": 0.07947677373886108 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018526611849665642, + "epoch": 9.11, + "learning_rate": 3.967997298408352e-05, + "loss": 0.0217, + "step": 9589, + "task_loss": 0.04995952919125557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02319607511162758, + "epoch": 9.11, + "learning_rate": 3.967134684048607e-05, + "loss": 0.0304, + "step": 9590, + "task_loss": 0.09573376178741455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03988368809223175, + "epoch": 9.11, + "learning_rate": 3.96627180317678e-05, + "loss": 0.0397, + "step": 9591, + "task_loss": 0.03814127668738365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030341364443302155, + "epoch": 9.11, + "learning_rate": 3.965408655949619e-05, + "loss": 0.0277, + "step": 9592, + "task_loss": 0.004290319979190826 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018847042694687843, + "epoch": 9.11, + "learning_rate": 3.964545242523917e-05, + "loss": 0.0174, + "step": 9593, + "task_loss": 0.004647746682167053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05705046281218529, + "epoch": 9.11, + "learning_rate": 3.9636815630565194e-05, + "loss": 0.055, + "step": 9594, + "task_loss": 0.03697717562317848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02136809565126896, + "epoch": 9.11, + "learning_rate": 3.962817617704317e-05, + "loss": 0.0265, + "step": 9595, + "task_loss": 0.07274884730577469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.2957301437854767, + "epoch": 9.11, + "learning_rate": 3.9619534066242485e-05, + "loss": 0.281, + "step": 9596, + "task_loss": 0.14882266521453857 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018483705818653107, + "epoch": 9.11, + "learning_rate": 3.961088929973303e-05, + "loss": 0.0266, + "step": 9597, + "task_loss": 0.09919779002666473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031163640320301056, + "epoch": 9.11, + "learning_rate": 3.960224187908518e-05, + "loss": 0.0309, + "step": 9598, + "task_loss": 0.028584491461515427 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0134575255215168, + "epoch": 9.12, + "learning_rate": 3.959359180586975e-05, + "loss": 0.0128, + "step": 9599, + "task_loss": 0.006391104310750961 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020069237798452377, + "epoch": 9.12, + "learning_rate": 3.958493908165809e-05, + "loss": 0.0185, + "step": 9600, + "task_loss": 0.004250597208738327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04818892478942871, + "epoch": 9.12, + "learning_rate": 3.9576283708022e-05, + "loss": 0.0446, + "step": 9601, + "task_loss": 0.012091221287846565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06722578406333923, + "epoch": 9.12, + "learning_rate": 3.956762568653378e-05, + "loss": 0.0683, + "step": 9602, + "task_loss": 0.07760586589574814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021520771086215973, + "epoch": 9.12, + "learning_rate": 3.95589650187662e-05, + "loss": 0.0263, + "step": 9603, + "task_loss": 0.06911582499742508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07241514325141907, + "epoch": 9.12, + "learning_rate": 3.95503017062925e-05, + "loss": 0.0696, + "step": 9604, + "task_loss": 0.04403474181890488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0638948380947113, + "epoch": 9.12, + "learning_rate": 3.954163575068643e-05, + "loss": 0.0619, + "step": 9605, + "task_loss": 0.04400301352143288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014446464367210865, + "epoch": 9.12, + "learning_rate": 3.953296715352218e-05, + "loss": 0.0136, + "step": 9606, + "task_loss": 0.0059725940227508545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016270997002720833, + "epoch": 9.12, + "learning_rate": 3.952429591637446e-05, + "loss": 0.0242, + "step": 9607, + "task_loss": 0.095741406083107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025010449811816216, + "epoch": 9.12, + "learning_rate": 3.951562204081845e-05, + "loss": 0.0283, + "step": 9608, + "task_loss": 0.057582780718803406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07090044021606445, + "epoch": 9.13, + "learning_rate": 3.950694552842977e-05, + "loss": 0.0913, + "step": 9609, + "task_loss": 0.2748509645462036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022294355556368828, + "epoch": 9.13, + "learning_rate": 3.949826638078457e-05, + "loss": 0.0367, + "step": 9610, + "task_loss": 0.16597087681293488 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02015480399131775, + "epoch": 9.13, + "learning_rate": 3.948958459945946e-05, + "loss": 0.0314, + "step": 9611, + "task_loss": 0.13252711296081543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06596086174249649, + "epoch": 9.13, + "learning_rate": 3.948090018603153e-05, + "loss": 0.0796, + "step": 9612, + "task_loss": 0.20231035351753235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018903814256191254, + "epoch": 9.13, + "learning_rate": 3.947221314207834e-05, + "loss": 0.0329, + "step": 9613, + "task_loss": 0.15914104878902435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03513910248875618, + "epoch": 9.13, + "learning_rate": 3.9463523469177935e-05, + "loss": 0.0369, + "step": 9614, + "task_loss": 0.05302010476589203 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04065433889627457, + "epoch": 9.13, + "learning_rate": 3.9454831168908824e-05, + "loss": 0.0387, + "step": 9615, + "task_loss": 0.021478936076164246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02802848257124424, + "epoch": 9.13, + "learning_rate": 3.9446136242850025e-05, + "loss": 0.0257, + "step": 9616, + "task_loss": 0.004691721871495247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09828861057758331, + "epoch": 9.13, + "learning_rate": 3.9437438692581e-05, + "loss": 0.0998, + "step": 9617, + "task_loss": 0.11373132467269897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05762851983308792, + "epoch": 9.13, + "learning_rate": 3.9428738519681704e-05, + "loss": 0.0569, + "step": 9618, + "task_loss": 0.04999490827322006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03731340914964676, + "epoch": 9.13, + "learning_rate": 3.942003572573257e-05, + "loss": 0.037, + "step": 9619, + "task_loss": 0.0341169647872448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023080751299858093, + "epoch": 9.14, + "learning_rate": 3.94113303123145e-05, + "loss": 0.0346, + "step": 9620, + "task_loss": 0.13836701214313507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014194730669260025, + "epoch": 9.14, + "learning_rate": 3.9402622281008874e-05, + "loss": 0.0199, + "step": 9621, + "task_loss": 0.07124481350183487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05054982379078865, + "epoch": 9.14, + "learning_rate": 3.939391163339754e-05, + "loss": 0.0559, + "step": 9622, + "task_loss": 0.10361889749765396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014216311275959015, + "epoch": 9.14, + "learning_rate": 3.9385198371062845e-05, + "loss": 0.0241, + "step": 9623, + "task_loss": 0.11282174289226532 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014683485962450504, + "epoch": 9.14, + "learning_rate": 3.937648249558758e-05, + "loss": 0.0136, + "step": 9624, + "task_loss": 0.0039719510823488235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016595659777522087, + "epoch": 9.14, + "learning_rate": 3.9367764008555034e-05, + "loss": 0.0156, + "step": 9625, + "task_loss": 0.006236037239432335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015016734600067139, + "epoch": 9.14, + "learning_rate": 3.9359042911548955e-05, + "loss": 0.014, + "step": 9626, + "task_loss": 0.005279116332530975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012623798102140427, + "epoch": 9.14, + "learning_rate": 3.935031920615358e-05, + "loss": 0.016, + "step": 9627, + "task_loss": 0.04630003124475479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05201908200979233, + "epoch": 9.14, + "learning_rate": 3.934159289395361e-05, + "loss": 0.0515, + "step": 9628, + "task_loss": 0.04690450802445412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02428932674229145, + "epoch": 9.14, + "learning_rate": 3.9332863976534225e-05, + "loss": 0.0316, + "step": 9629, + "task_loss": 0.0969204306602478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03481846675276756, + "epoch": 9.15, + "learning_rate": 3.9324132455481064e-05, + "loss": 0.0322, + "step": 9630, + "task_loss": 0.008588599041104317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011342018842697144, + "epoch": 9.15, + "learning_rate": 3.931539833238026e-05, + "loss": 0.0157, + "step": 9631, + "task_loss": 0.05512590333819389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04356681555509567, + "epoch": 9.15, + "learning_rate": 3.930666160881841e-05, + "loss": 0.0502, + "step": 9632, + "task_loss": 0.10991165041923523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03675740957260132, + "epoch": 9.15, + "learning_rate": 3.9297922286382573e-05, + "loss": 0.0385, + "step": 9633, + "task_loss": 0.05395536124706268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012938008643686771, + "epoch": 9.15, + "learning_rate": 3.928918036666029e-05, + "loss": 0.0123, + "step": 9634, + "task_loss": 0.006980478763580322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038404569029808044, + "epoch": 9.15, + "learning_rate": 3.928043585123957e-05, + "loss": 0.0375, + "step": 9635, + "task_loss": 0.029215719550848007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04972174018621445, + "epoch": 9.15, + "learning_rate": 3.927168874170891e-05, + "loss": 0.0643, + "step": 9636, + "task_loss": 0.19559511542320251 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0464179627597332, + "epoch": 9.15, + "learning_rate": 3.926293903965726e-05, + "loss": 0.0465, + "step": 9637, + "task_loss": 0.04701643064618111 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06579018384218216, + "epoch": 9.15, + "learning_rate": 3.925418674667405e-05, + "loss": 0.0745, + "step": 9638, + "task_loss": 0.1530209332704544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04052596539258957, + "epoch": 9.15, + "learning_rate": 3.924543186434915e-05, + "loss": 0.0394, + "step": 9639, + "task_loss": 0.029542801901698112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06306673586368561, + "epoch": 9.15, + "learning_rate": 3.923667439427295e-05, + "loss": 0.066, + "step": 9640, + "task_loss": 0.09248776733875275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04041620343923569, + "epoch": 9.16, + "learning_rate": 3.922791433803629e-05, + "loss": 0.0383, + "step": 9641, + "task_loss": 0.019168421626091003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015475798398256302, + "epoch": 9.16, + "learning_rate": 3.921915169723046e-05, + "loss": 0.0145, + "step": 9642, + "task_loss": 0.005340602248907089 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12808610498905182, + "epoch": 9.16, + "learning_rate": 3.921038647344725e-05, + "loss": 0.1262, + "step": 9643, + "task_loss": 0.10924281924962997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04638529568910599, + "epoch": 9.16, + "learning_rate": 3.920161866827889e-05, + "loss": 0.0483, + "step": 9644, + "task_loss": 0.06601787358522415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13820701837539673, + "epoch": 9.16, + "learning_rate": 3.9192848283318114e-05, + "loss": 0.1419, + "step": 9645, + "task_loss": 0.17469622194766998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0257248692214489, + "epoch": 9.16, + "learning_rate": 3.91840753201581e-05, + "loss": 0.0321, + "step": 9646, + "task_loss": 0.08985202014446259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057688090950250626, + "epoch": 9.16, + "learning_rate": 3.917529978039247e-05, + "loss": 0.0599, + "step": 9647, + "task_loss": 0.079569973051548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02995416149497032, + "epoch": 9.16, + "learning_rate": 3.9166521665615386e-05, + "loss": 0.0362, + "step": 9648, + "task_loss": 0.09234865754842758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014706909656524658, + "epoch": 9.16, + "learning_rate": 3.91577409774214e-05, + "loss": 0.0136, + "step": 9649, + "task_loss": 0.0036916378885507584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04012545198202133, + "epoch": 9.16, + "learning_rate": 3.9148957717405596e-05, + "loss": 0.0443, + "step": 9650, + "task_loss": 0.08144651353359222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03406708687543869, + "epoch": 9.17, + "learning_rate": 3.914017188716347e-05, + "loss": 0.0402, + "step": 9651, + "task_loss": 0.09540209174156189 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022918783128261566, + "epoch": 9.17, + "learning_rate": 3.913138348829102e-05, + "loss": 0.0348, + "step": 9652, + "task_loss": 0.14145193994045258 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013142495416104794, + "epoch": 9.17, + "learning_rate": 3.91225925223847e-05, + "loss": 0.0121, + "step": 9653, + "task_loss": 0.0028307754546403885 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036516182124614716, + "epoch": 9.17, + "learning_rate": 3.911379899104144e-05, + "loss": 0.0346, + "step": 9654, + "task_loss": 0.016904115676879883 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04557647556066513, + "epoch": 9.17, + "learning_rate": 3.910500289585862e-05, + "loss": 0.045, + "step": 9655, + "task_loss": 0.04023095220327377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01984507404267788, + "epoch": 9.17, + "learning_rate": 3.90962042384341e-05, + "loss": 0.0186, + "step": 9656, + "task_loss": 0.0074311550706624985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0343051478266716, + "epoch": 9.17, + "learning_rate": 3.908740302036618e-05, + "loss": 0.0397, + "step": 9657, + "task_loss": 0.08872069418430328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024765335023403168, + "epoch": 9.17, + "learning_rate": 3.907859924325366e-05, + "loss": 0.0381, + "step": 9658, + "task_loss": 0.15835905075073242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12249832600355148, + "epoch": 9.17, + "learning_rate": 3.906979290869578e-05, + "loss": 0.1308, + "step": 9659, + "task_loss": 0.20595303177833557 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012475738301873207, + "epoch": 9.17, + "learning_rate": 3.9060984018292267e-05, + "loss": 0.0118, + "step": 9660, + "task_loss": 0.0056333523243665695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023209767416119576, + "epoch": 9.17, + "learning_rate": 3.905217257364328e-05, + "loss": 0.0386, + "step": 9661, + "task_loss": 0.17690522968769073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04662572965025902, + "epoch": 9.18, + "learning_rate": 3.904335857634948e-05, + "loss": 0.0445, + "step": 9662, + "task_loss": 0.0256513562053442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04135457053780556, + "epoch": 9.18, + "learning_rate": 3.9034542028011944e-05, + "loss": 0.0443, + "step": 9663, + "task_loss": 0.07109534740447998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036145225167274475, + "epoch": 9.18, + "learning_rate": 3.902572293023227e-05, + "loss": 0.0458, + "step": 9664, + "task_loss": 0.13244393467903137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03712350130081177, + "epoch": 9.18, + "learning_rate": 3.9016901284612474e-05, + "loss": 0.0569, + "step": 9665, + "task_loss": 0.23489037156105042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011418421752750874, + "epoch": 9.18, + "learning_rate": 3.9008077092755055e-05, + "loss": 0.0162, + "step": 9666, + "task_loss": 0.058784160763025284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08879618346691132, + "epoch": 9.18, + "learning_rate": 3.899925035626296e-05, + "loss": 0.0833, + "step": 9667, + "task_loss": 0.03357243910431862 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10656376928091049, + "epoch": 9.18, + "learning_rate": 3.899042107673962e-05, + "loss": 0.1155, + "step": 9668, + "task_loss": 0.19606941938400269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02124224603176117, + "epoch": 9.18, + "learning_rate": 3.898158925578893e-05, + "loss": 0.0316, + "step": 9669, + "task_loss": 0.12506425380706787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023364577442407608, + "epoch": 9.18, + "learning_rate": 3.89727548950152e-05, + "loss": 0.0221, + "step": 9670, + "task_loss": 0.01049577072262764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.054638948291540146, + "epoch": 9.18, + "learning_rate": 3.8963917996023245e-05, + "loss": 0.0538, + "step": 9671, + "task_loss": 0.04668677970767021 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03254498541355133, + "epoch": 9.19, + "learning_rate": 3.8955078560418345e-05, + "loss": 0.0449, + "step": 9672, + "task_loss": 0.15559810400009155 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020883910357952118, + "epoch": 9.19, + "learning_rate": 3.894623658980622e-05, + "loss": 0.0193, + "step": 9673, + "task_loss": 0.005325049161911011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1067768931388855, + "epoch": 9.19, + "learning_rate": 3.8937392085793036e-05, + "loss": 0.1221, + "step": 9674, + "task_loss": 0.2597641348838806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018741261214017868, + "epoch": 9.19, + "learning_rate": 3.892854504998546e-05, + "loss": 0.029, + "step": 9675, + "task_loss": 0.12132866680622101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011632721871137619, + "epoch": 9.19, + "learning_rate": 3.891969548399061e-05, + "loss": 0.0203, + "step": 9676, + "task_loss": 0.09824814647436142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017414584755897522, + "epoch": 9.19, + "learning_rate": 3.891084338941603e-05, + "loss": 0.0162, + "step": 9677, + "task_loss": 0.00522034615278244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012747041881084442, + "epoch": 9.19, + "learning_rate": 3.8901988767869744e-05, + "loss": 0.0118, + "step": 9678, + "task_loss": 0.0028500892221927643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.055840395390987396, + "epoch": 9.19, + "learning_rate": 3.8893131620960254e-05, + "loss": 0.057, + "step": 9679, + "task_loss": 0.06704328209161758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03462572395801544, + "epoch": 9.19, + "learning_rate": 3.88842719502965e-05, + "loss": 0.0367, + "step": 9680, + "task_loss": 0.05576511472463608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.050681374967098236, + "epoch": 9.19, + "learning_rate": 3.887540975748787e-05, + "loss": 0.0543, + "step": 9681, + "task_loss": 0.08639328181743622 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029797300696372986, + "epoch": 9.19, + "learning_rate": 3.8866545044144234e-05, + "loss": 0.0337, + "step": 9682, + "task_loss": 0.06878768652677536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014516827650368214, + "epoch": 9.2, + "learning_rate": 3.885767781187591e-05, + "loss": 0.0236, + "step": 9683, + "task_loss": 0.10563208907842636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013084099628031254, + "epoch": 9.2, + "learning_rate": 3.884880806229367e-05, + "loss": 0.0139, + "step": 9684, + "task_loss": 0.020864391699433327 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022157765924930573, + "epoch": 9.2, + "learning_rate": 3.883993579700875e-05, + "loss": 0.0206, + "step": 9685, + "task_loss": 0.006276823580265045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059577975422143936, + "epoch": 9.2, + "learning_rate": 3.883106101763285e-05, + "loss": 0.0571, + "step": 9686, + "task_loss": 0.03433872014284134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04622458666563034, + "epoch": 9.2, + "learning_rate": 3.882218372577809e-05, + "loss": 0.06, + "step": 9687, + "task_loss": 0.18399886786937714 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030847573652863503, + "epoch": 9.2, + "learning_rate": 3.881330392305709e-05, + "loss": 0.0407, + "step": 9688, + "task_loss": 0.1292884647846222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10208854079246521, + "epoch": 9.2, + "learning_rate": 3.8804421611082916e-05, + "loss": 0.1025, + "step": 9689, + "task_loss": 0.10571222007274628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07040687650442123, + "epoch": 9.2, + "learning_rate": 3.8795536791469066e-05, + "loss": 0.0801, + "step": 9690, + "task_loss": 0.16708990931510925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022444941103458405, + "epoch": 9.2, + "learning_rate": 3.8786649465829516e-05, + "loss": 0.0288, + "step": 9691, + "task_loss": 0.08632338047027588 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014007965102791786, + "epoch": 9.2, + "learning_rate": 3.8777759635778696e-05, + "loss": 0.0129, + "step": 9692, + "task_loss": 0.003078687936067581 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03719889000058174, + "epoch": 9.21, + "learning_rate": 3.876886730293149e-05, + "loss": 0.0361, + "step": 9693, + "task_loss": 0.0266589242964983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01961357146501541, + "epoch": 9.21, + "learning_rate": 3.8759972468903215e-05, + "loss": 0.0255, + "step": 9694, + "task_loss": 0.07875269651412964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021031856536865234, + "epoch": 9.21, + "learning_rate": 3.875107513530968e-05, + "loss": 0.0209, + "step": 9695, + "task_loss": 0.01938748173415661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0264435987919569, + "epoch": 9.21, + "learning_rate": 3.874217530376711e-05, + "loss": 0.0375, + "step": 9696, + "task_loss": 0.13744567334651947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07807254046201706, + "epoch": 9.21, + "learning_rate": 3.873327297589223e-05, + "loss": 0.0742, + "step": 9697, + "task_loss": 0.03972852602601051 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024312328547239304, + "epoch": 9.21, + "learning_rate": 3.8724368153302166e-05, + "loss": 0.0274, + "step": 9698, + "task_loss": 0.055370982736349106 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01522772666066885, + "epoch": 9.21, + "learning_rate": 3.871546083761453e-05, + "loss": 0.0185, + "step": 9699, + "task_loss": 0.04807128384709358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10017004609107971, + "epoch": 9.21, + "learning_rate": 3.870655103044738e-05, + "loss": 0.1018, + "step": 9700, + "task_loss": 0.11637624353170395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04961914196610451, + "epoch": 9.21, + "learning_rate": 3.8697638733419216e-05, + "loss": 0.0533, + "step": 9701, + "task_loss": 0.086252860724926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06641680747270584, + "epoch": 9.21, + "learning_rate": 3.8688723948149014e-05, + "loss": 0.0691, + "step": 9702, + "task_loss": 0.0931624248623848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025418289005756378, + "epoch": 9.21, + "learning_rate": 3.867980667625618e-05, + "loss": 0.0309, + "step": 9703, + "task_loss": 0.07991056889295578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017918124794960022, + "epoch": 9.22, + "learning_rate": 3.867088691936058e-05, + "loss": 0.0227, + "step": 9704, + "task_loss": 0.06607376039028168 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051767975091934204, + "epoch": 9.22, + "learning_rate": 3.8661964679082535e-05, + "loss": 0.0597, + "step": 9705, + "task_loss": 0.13116154074668884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030053768306970596, + "epoch": 9.22, + "learning_rate": 3.8653039957042806e-05, + "loss": 0.0285, + "step": 9706, + "task_loss": 0.014347141608595848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010007740929722786, + "epoch": 9.22, + "learning_rate": 3.8644112754862614e-05, + "loss": 0.0093, + "step": 9707, + "task_loss": 0.002756282687187195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03509853407740593, + "epoch": 9.22, + "learning_rate": 3.8635183074163636e-05, + "loss": 0.038, + "step": 9708, + "task_loss": 0.06392055749893188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02037603035569191, + "epoch": 9.22, + "learning_rate": 3.862625091656797e-05, + "loss": 0.0211, + "step": 9709, + "task_loss": 0.02797846868634224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04154079779982567, + "epoch": 9.22, + "learning_rate": 3.861731628369822e-05, + "loss": 0.0651, + "step": 9710, + "task_loss": 0.2771263122558594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028145533055067062, + "epoch": 9.22, + "learning_rate": 3.8608379177177375e-05, + "loss": 0.042, + "step": 9711, + "task_loss": 0.1668614000082016 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018877774477005005, + "epoch": 9.22, + "learning_rate": 3.8599439598628916e-05, + "loss": 0.0314, + "step": 9712, + "task_loss": 0.14435160160064697 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.128778338432312, + "epoch": 9.22, + "learning_rate": 3.8590497549676753e-05, + "loss": 0.1268, + "step": 9713, + "task_loss": 0.1085672602057457 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02545616589486599, + "epoch": 9.23, + "learning_rate": 3.858155303194526e-05, + "loss": 0.0276, + "step": 9714, + "task_loss": 0.0471004918217659 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025828856974840164, + "epoch": 9.23, + "learning_rate": 3.8572606047059254e-05, + "loss": 0.0246, + "step": 9715, + "task_loss": 0.01345803216099739 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018282314762473106, + "epoch": 9.23, + "learning_rate": 3.856365659664399e-05, + "loss": 0.0225, + "step": 9716, + "task_loss": 0.06006855517625809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0179976187646389, + "epoch": 9.23, + "learning_rate": 3.855470468232518e-05, + "loss": 0.0237, + "step": 9717, + "task_loss": 0.07452400773763657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017134424299001694, + "epoch": 9.23, + "learning_rate": 3.854575030572898e-05, + "loss": 0.0229, + "step": 9718, + "task_loss": 0.0750466138124466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015787985175848007, + "epoch": 9.23, + "learning_rate": 3.853679346848201e-05, + "loss": 0.0209, + "step": 9719, + "task_loss": 0.06708446890115738 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023376762866973877, + "epoch": 9.23, + "learning_rate": 3.8527834172211306e-05, + "loss": 0.0362, + "step": 9720, + "task_loss": 0.15146197378635406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017823493108153343, + "epoch": 9.23, + "learning_rate": 3.851887241854438e-05, + "loss": 0.0172, + "step": 9721, + "task_loss": 0.011508205905556679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01219139527529478, + "epoch": 9.23, + "learning_rate": 3.850990820910917e-05, + "loss": 0.0114, + "step": 9722, + "task_loss": 0.003914957866072655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021136948838829994, + "epoch": 9.23, + "learning_rate": 3.8500941545534065e-05, + "loss": 0.0259, + "step": 9723, + "task_loss": 0.06834466755390167 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05498252436518669, + "epoch": 9.23, + "learning_rate": 3.849197242944791e-05, + "loss": 0.0571, + "step": 9724, + "task_loss": 0.07627555727958679 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03645971044898033, + "epoch": 9.24, + "learning_rate": 3.8483000862479986e-05, + "loss": 0.0411, + "step": 9725, + "task_loss": 0.08248203247785568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023510009050369263, + "epoch": 9.24, + "learning_rate": 3.8474026846260015e-05, + "loss": 0.0419, + "step": 9726, + "task_loss": 0.20696833729743958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037108682096004486, + "epoch": 9.24, + "learning_rate": 3.846505038241818e-05, + "loss": 0.0336, + "step": 9727, + "task_loss": 0.0021884366869926453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05788455903530121, + "epoch": 9.24, + "learning_rate": 3.84560714725851e-05, + "loss": 0.0531, + "step": 9728, + "task_loss": 0.009828178212046623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01626589521765709, + "epoch": 9.24, + "learning_rate": 3.8447090118391814e-05, + "loss": 0.0339, + "step": 9729, + "task_loss": 0.19226785004138947 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044995106756687164, + "epoch": 9.24, + "learning_rate": 3.8438106321469864e-05, + "loss": 0.0445, + "step": 9730, + "task_loss": 0.03956909477710724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018232377246022224, + "epoch": 9.24, + "learning_rate": 3.842912008345117e-05, + "loss": 0.0229, + "step": 9731, + "task_loss": 0.0647912323474884 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01168222725391388, + "epoch": 9.24, + "learning_rate": 3.842013140596815e-05, + "loss": 0.0143, + "step": 9732, + "task_loss": 0.0380130261182785 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011876760981976986, + "epoch": 9.24, + "learning_rate": 3.841114029065362e-05, + "loss": 0.011, + "step": 9733, + "task_loss": 0.003131164237856865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05818028748035431, + "epoch": 9.24, + "learning_rate": 3.8402146739140874e-05, + "loss": 0.0713, + "step": 9734, + "task_loss": 0.1889064908027649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016233699396252632, + "epoch": 9.25, + "learning_rate": 3.8393150753063614e-05, + "loss": 0.0202, + "step": 9735, + "task_loss": 0.05590314790606499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06847208738327026, + "epoch": 9.25, + "learning_rate": 3.838415233405603e-05, + "loss": 0.0667, + "step": 9736, + "task_loss": 0.050349440425634384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022901874035596848, + "epoch": 9.25, + "learning_rate": 3.837515148375271e-05, + "loss": 0.021, + "step": 9737, + "task_loss": 0.003996755927801132 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02838345803320408, + "epoch": 9.25, + "learning_rate": 3.836614820378871e-05, + "loss": 0.0341, + "step": 9738, + "task_loss": 0.08570387959480286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016883879899978638, + "epoch": 9.25, + "learning_rate": 3.835714249579952e-05, + "loss": 0.0167, + "step": 9739, + "task_loss": 0.015314383432269096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03494098410010338, + "epoch": 9.25, + "learning_rate": 3.8348134361421064e-05, + "loss": 0.0337, + "step": 9740, + "task_loss": 0.022901371121406555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016907619312405586, + "epoch": 9.25, + "learning_rate": 3.8339123802289716e-05, + "loss": 0.0157, + "step": 9741, + "task_loss": 0.00507272407412529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043327562510967255, + "epoch": 9.25, + "learning_rate": 3.8330110820042285e-05, + "loss": 0.0487, + "step": 9742, + "task_loss": 0.09731481969356537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12834414839744568, + "epoch": 9.25, + "learning_rate": 3.8321095416316024e-05, + "loss": 0.136, + "step": 9743, + "task_loss": 0.20463687181472778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032464127987623215, + "epoch": 9.25, + "learning_rate": 3.831207759274863e-05, + "loss": 0.0303, + "step": 9744, + "task_loss": 0.011053359135985374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022374771535396576, + "epoch": 9.25, + "learning_rate": 3.8303057350978224e-05, + "loss": 0.0216, + "step": 9745, + "task_loss": 0.014581706374883652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.045660026371479034, + "epoch": 9.26, + "learning_rate": 3.829403469264339e-05, + "loss": 0.0425, + "step": 9746, + "task_loss": 0.013828214257955551 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023104814812541008, + "epoch": 9.26, + "learning_rate": 3.828500961938313e-05, + "loss": 0.0377, + "step": 9747, + "task_loss": 0.1689058542251587 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010014375671744347, + "epoch": 9.26, + "learning_rate": 3.827598213283688e-05, + "loss": 0.0095, + "step": 9748, + "task_loss": 0.004870768636465073 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11848193407058716, + "epoch": 9.26, + "learning_rate": 3.8266952234644545e-05, + "loss": 0.1292, + "step": 9749, + "task_loss": 0.22540438175201416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12723466753959656, + "epoch": 9.26, + "learning_rate": 3.825791992644644e-05, + "loss": 0.1362, + "step": 9750, + "task_loss": 0.21650384366512299 + }, + { + "epoch": 9.26, + "eval_accuracy": 0.9013761467889908, + "eval_loss": 0.4256434738636017, + "eval_runtime": 17.9197, + "eval_samples_per_second": 48.662, + "eval_steps_per_second": 6.083, + "step": 9750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02083965763449669, + "epoch": 9.26, + "learning_rate": 3.824888520988333e-05, + "loss": 0.0192, + "step": 9751, + "task_loss": 0.004488172009587288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025724343955516815, + "epoch": 9.26, + "learning_rate": 3.823984808659641e-05, + "loss": 0.0374, + "step": 9752, + "task_loss": 0.14217713475227356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012440409511327744, + "epoch": 9.26, + "learning_rate": 3.8230808558227335e-05, + "loss": 0.0116, + "step": 9753, + "task_loss": 0.0036589261144399643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009372575208544731, + "epoch": 9.26, + "learning_rate": 3.8221766626418155e-05, + "loss": 0.009, + "step": 9754, + "task_loss": 0.005342619493603706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032062187790870667, + "epoch": 9.26, + "learning_rate": 3.821272229281139e-05, + "loss": 0.0349, + "step": 9755, + "task_loss": 0.06090862303972244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05444857478141785, + "epoch": 9.26, + "learning_rate": 3.820367555904999e-05, + "loss": 0.0574, + "step": 9756, + "task_loss": 0.08438717573881149 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02243727631866932, + "epoch": 9.27, + "learning_rate": 3.819462642677733e-05, + "loss": 0.0375, + "step": 9757, + "task_loss": 0.17298638820648193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019842922687530518, + "epoch": 9.27, + "learning_rate": 3.818557489763724e-05, + "loss": 0.0347, + "step": 9758, + "task_loss": 0.16849234700202942 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014697995036840439, + "epoch": 9.27, + "learning_rate": 3.817652097327396e-05, + "loss": 0.0137, + "step": 9759, + "task_loss": 0.004293628036975861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0161375030875206, + "epoch": 9.27, + "learning_rate": 3.81674646553322e-05, + "loss": 0.0251, + "step": 9760, + "task_loss": 0.10576816648244858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025952016934752464, + "epoch": 9.27, + "learning_rate": 3.815840594545706e-05, + "loss": 0.0432, + "step": 9761, + "task_loss": 0.19875141978263855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030038634315133095, + "epoch": 9.27, + "learning_rate": 3.814934484529411e-05, + "loss": 0.0387, + "step": 9762, + "task_loss": 0.11658079922199249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1559152901172638, + "epoch": 9.27, + "learning_rate": 3.8140281356489346e-05, + "loss": 0.1487, + "step": 9763, + "task_loss": 0.08386341482400894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02892562374472618, + "epoch": 9.27, + "learning_rate": 3.8131215480689184e-05, + "loss": 0.0478, + "step": 9764, + "task_loss": 0.21779313683509827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021166235208511353, + "epoch": 9.27, + "learning_rate": 3.812214721954049e-05, + "loss": 0.0218, + "step": 9765, + "task_loss": 0.027633680030703545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021776802837848663, + "epoch": 9.27, + "learning_rate": 3.811307657469055e-05, + "loss": 0.0203, + "step": 9766, + "task_loss": 0.007460303604602814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022118225693702698, + "epoch": 9.28, + "learning_rate": 3.8104003547787105e-05, + "loss": 0.0211, + "step": 9767, + "task_loss": 0.011582162231206894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012279561720788479, + "epoch": 9.28, + "learning_rate": 3.809492814047831e-05, + "loss": 0.0181, + "step": 9768, + "task_loss": 0.07050621509552002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022169344127178192, + "epoch": 9.28, + "learning_rate": 3.8085850354412745e-05, + "loss": 0.0205, + "step": 9769, + "task_loss": 0.005162643268704414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032863132655620575, + "epoch": 9.28, + "learning_rate": 3.807677019123944e-05, + "loss": 0.0387, + "step": 9770, + "task_loss": 0.09106673300266266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0191971268504858, + "epoch": 9.28, + "learning_rate": 3.806768765260785e-05, + "loss": 0.021, + "step": 9771, + "task_loss": 0.037707261741161346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01197386346757412, + "epoch": 9.28, + "learning_rate": 3.805860274016787e-05, + "loss": 0.0118, + "step": 9772, + "task_loss": 0.00973932072520256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024311240762472153, + "epoch": 9.28, + "learning_rate": 3.8049515455569816e-05, + "loss": 0.0404, + "step": 9773, + "task_loss": 0.1852773129940033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01635361835360527, + "epoch": 9.28, + "learning_rate": 3.804042580046442e-05, + "loss": 0.0222, + "step": 9774, + "task_loss": 0.07465977221727371 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023430045694112778, + "epoch": 9.28, + "learning_rate": 3.803133377650288e-05, + "loss": 0.0294, + "step": 9775, + "task_loss": 0.08316051959991455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01706859841942787, + "epoch": 9.28, + "learning_rate": 3.80222393853368e-05, + "loss": 0.0314, + "step": 9776, + "task_loss": 0.1608736664056778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01352263055741787, + "epoch": 9.28, + "learning_rate": 3.801314262861822e-05, + "loss": 0.0127, + "step": 9777, + "task_loss": 0.005207255482673645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039057813584804535, + "epoch": 9.29, + "learning_rate": 3.800404350799961e-05, + "loss": 0.036, + "step": 9778, + "task_loss": 0.008711079135537148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016933217644691467, + "epoch": 9.29, + "learning_rate": 3.799494202513386e-05, + "loss": 0.0157, + "step": 9779, + "task_loss": 0.004555156454443932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07299914956092834, + "epoch": 9.29, + "learning_rate": 3.798583818167432e-05, + "loss": 0.0931, + "step": 9780, + "task_loss": 0.27392470836639404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012407921254634857, + "epoch": 9.29, + "learning_rate": 3.797673197927473e-05, + "loss": 0.0119, + "step": 9781, + "task_loss": 0.006941312924027443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019861984997987747, + "epoch": 9.29, + "learning_rate": 3.796762341958927e-05, + "loss": 0.0185, + "step": 9782, + "task_loss": 0.0062296707183122635 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03457757458090782, + "epoch": 9.29, + "learning_rate": 3.795851250427257e-05, + "loss": 0.0449, + "step": 9783, + "task_loss": 0.1378023773431778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07008830457925797, + "epoch": 9.29, + "learning_rate": 3.794939923497967e-05, + "loss": 0.0876, + "step": 9784, + "task_loss": 0.2450743168592453 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020512813702225685, + "epoch": 9.29, + "learning_rate": 3.794028361336603e-05, + "loss": 0.0237, + "step": 9785, + "task_loss": 0.05278385803103447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05949783697724342, + "epoch": 9.29, + "learning_rate": 3.793116564108754e-05, + "loss": 0.0719, + "step": 9786, + "task_loss": 0.1833191066980362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012088644318282604, + "epoch": 9.29, + "learning_rate": 3.7922045319800545e-05, + "loss": 0.0161, + "step": 9787, + "task_loss": 0.052241381257772446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0391070693731308, + "epoch": 9.3, + "learning_rate": 3.7912922651161783e-05, + "loss": 0.0505, + "step": 9788, + "task_loss": 0.15337170660495758 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01692778989672661, + "epoch": 9.3, + "learning_rate": 3.790379763682844e-05, + "loss": 0.0157, + "step": 9789, + "task_loss": 0.004635356366634369 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15806353092193604, + "epoch": 9.3, + "learning_rate": 3.7894670278458096e-05, + "loss": 0.152, + "step": 9790, + "task_loss": 0.09754717350006104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12203462421894073, + "epoch": 9.3, + "learning_rate": 3.7885540577708804e-05, + "loss": 0.1233, + "step": 9791, + "task_loss": 0.13422806560993195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07689625769853592, + "epoch": 9.3, + "learning_rate": 3.7876408536239006e-05, + "loss": 0.0828, + "step": 9792, + "task_loss": 0.1364186704158783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03227274864912033, + "epoch": 9.3, + "learning_rate": 3.7867274155707585e-05, + "loss": 0.0374, + "step": 9793, + "task_loss": 0.08388587832450867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03347526490688324, + "epoch": 9.3, + "learning_rate": 3.7858137437773845e-05, + "loss": 0.0504, + "step": 9794, + "task_loss": 0.2030281275510788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02590757980942726, + "epoch": 9.3, + "learning_rate": 3.784899838409751e-05, + "loss": 0.0291, + "step": 9795, + "task_loss": 0.05813925340771675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024779632687568665, + "epoch": 9.3, + "learning_rate": 3.783985699633874e-05, + "loss": 0.0318, + "step": 9796, + "task_loss": 0.09515906125307083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12447907775640488, + "epoch": 9.3, + "learning_rate": 3.783071327615811e-05, + "loss": 0.1319, + "step": 9797, + "task_loss": 0.19891595840454102 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06262899935245514, + "epoch": 9.3, + "learning_rate": 3.7821567225216615e-05, + "loss": 0.0599, + "step": 9798, + "task_loss": 0.035038989037275314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03002050705254078, + "epoch": 9.31, + "learning_rate": 3.781241884517569e-05, + "loss": 0.0338, + "step": 9799, + "task_loss": 0.06747624278068542 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04927913099527359, + "epoch": 9.31, + "learning_rate": 3.780326813769717e-05, + "loss": 0.0507, + "step": 9800, + "task_loss": 0.06330694258213043 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03860379010438919, + "epoch": 9.31, + "learning_rate": 3.779411510444334e-05, + "loss": 0.0354, + "step": 9801, + "task_loss": 0.006218938156962395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028020869940519333, + "epoch": 9.31, + "learning_rate": 3.778495974707688e-05, + "loss": 0.0354, + "step": 9802, + "task_loss": 0.10227955132722855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008688807487487793, + "epoch": 9.31, + "learning_rate": 3.7775802067260905e-05, + "loss": 0.0106, + "step": 9803, + "task_loss": 0.02761073224246502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.15427309274673462, + "epoch": 9.31, + "learning_rate": 3.776664206665896e-05, + "loss": 0.1546, + "step": 9804, + "task_loss": 0.15730072557926178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02751828357577324, + "epoch": 9.31, + "learning_rate": 3.7757479746935e-05, + "loss": 0.0351, + "step": 9805, + "task_loss": 0.10342276096343994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02257220633327961, + "epoch": 9.31, + "learning_rate": 3.77483151097534e-05, + "loss": 0.0333, + "step": 9806, + "task_loss": 0.1300220787525177 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16032102704048157, + "epoch": 9.31, + "learning_rate": 3.773914815677897e-05, + "loss": 0.154, + "step": 9807, + "task_loss": 0.09661975502967834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039088841527700424, + "epoch": 9.31, + "learning_rate": 3.7729978889676915e-05, + "loss": 0.0461, + "step": 9808, + "task_loss": 0.10870218276977539 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020086947828531265, + "epoch": 9.32, + "learning_rate": 3.7720807310112896e-05, + "loss": 0.0218, + "step": 9809, + "task_loss": 0.03744789958000183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012433727271854877, + "epoch": 9.32, + "learning_rate": 3.7711633419752954e-05, + "loss": 0.0208, + "step": 9810, + "task_loss": 0.09626266360282898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020316528156399727, + "epoch": 9.32, + "learning_rate": 3.7702457220263595e-05, + "loss": 0.0277, + "step": 9811, + "task_loss": 0.09421400725841522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06558854132890701, + "epoch": 9.32, + "learning_rate": 3.76932787133117e-05, + "loss": 0.0822, + "step": 9812, + "task_loss": 0.23205044865608215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05739504098892212, + "epoch": 9.32, + "learning_rate": 3.768409790056459e-05, + "loss": 0.0695, + "step": 9813, + "task_loss": 0.1781667172908783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09154346585273743, + "epoch": 9.32, + "learning_rate": 3.7674914783690006e-05, + "loss": 0.0877, + "step": 9814, + "task_loss": 0.05328264459967613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04409680515527725, + "epoch": 9.32, + "learning_rate": 3.7665729364356115e-05, + "loss": 0.0559, + "step": 9815, + "task_loss": 0.16190242767333984 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027593711391091347, + "epoch": 9.32, + "learning_rate": 3.7656541644231494e-05, + "loss": 0.0265, + "step": 9816, + "task_loss": 0.01616600714623928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.042008671909570694, + "epoch": 9.32, + "learning_rate": 3.764735162498512e-05, + "loss": 0.0398, + "step": 9817, + "task_loss": 0.01970026269555092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08180496841669083, + "epoch": 9.32, + "learning_rate": 3.763815930828641e-05, + "loss": 0.0787, + "step": 9818, + "task_loss": 0.05091682821512222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019493555650115013, + "epoch": 9.32, + "learning_rate": 3.76289646958052e-05, + "loss": 0.0245, + "step": 9819, + "task_loss": 0.06980308145284653 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04548114538192749, + "epoch": 9.33, + "learning_rate": 3.761976778921173e-05, + "loss": 0.0504, + "step": 9820, + "task_loss": 0.09444499015808105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0342935174703598, + "epoch": 9.33, + "learning_rate": 3.761056859017667e-05, + "loss": 0.0368, + "step": 9821, + "task_loss": 0.05982755869626999 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029260210692882538, + "epoch": 9.33, + "learning_rate": 3.7601367100371085e-05, + "loss": 0.0385, + "step": 9822, + "task_loss": 0.12197436392307281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02063072845339775, + "epoch": 9.33, + "learning_rate": 3.759216332146649e-05, + "loss": 0.0332, + "step": 9823, + "task_loss": 0.1467389613389969 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026403427124023438, + "epoch": 9.33, + "learning_rate": 3.7582957255134765e-05, + "loss": 0.0356, + "step": 9824, + "task_loss": 0.11809330433607101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009067846462130547, + "epoch": 9.33, + "learning_rate": 3.7573748903048266e-05, + "loss": 0.0086, + "step": 9825, + "task_loss": 0.004426542669534683 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0586862787604332, + "epoch": 9.33, + "learning_rate": 3.756453826687972e-05, + "loss": 0.0689, + "step": 9826, + "task_loss": 0.1604919135570526 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1495116949081421, + "epoch": 9.33, + "learning_rate": 3.755532534830229e-05, + "loss": 0.1435, + "step": 9827, + "task_loss": 0.08945684134960175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.056340716779232025, + "epoch": 9.33, + "learning_rate": 3.7546110148989535e-05, + "loss": 0.0687, + "step": 9828, + "task_loss": 0.1797863095998764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03908500820398331, + "epoch": 9.33, + "learning_rate": 3.7536892670615454e-05, + "loss": 0.044, + "step": 9829, + "task_loss": 0.0877419114112854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012538935989141464, + "epoch": 9.34, + "learning_rate": 3.752767291485444e-05, + "loss": 0.0195, + "step": 9830, + "task_loss": 0.08169474452733994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0177429411560297, + "epoch": 9.34, + "learning_rate": 3.7518450883381306e-05, + "loss": 0.017, + "step": 9831, + "task_loss": 0.010471796616911888 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012262466363608837, + "epoch": 9.34, + "learning_rate": 3.750922657787128e-05, + "loss": 0.0298, + "step": 9832, + "task_loss": 0.18793505430221558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12192486971616745, + "epoch": 9.34, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.1285, + "step": 9833, + "task_loss": 0.18805371224880219 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01544223167002201, + "epoch": 9.34, + "learning_rate": 3.7490771151443525e-05, + "loss": 0.0146, + "step": 9834, + "task_loss": 0.006540972739458084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028887242078781128, + "epoch": 9.34, + "learning_rate": 3.748154003387831e-05, + "loss": 0.0273, + "step": 9835, + "task_loss": 0.01312171295285225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014016509056091309, + "epoch": 9.34, + "learning_rate": 3.7472306648981235e-05, + "loss": 0.0132, + "step": 9836, + "task_loss": 0.0060823168605566025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059632837772369385, + "epoch": 9.34, + "learning_rate": 3.746307099842959e-05, + "loss": 0.055, + "step": 9837, + "task_loss": 0.01375570334494114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008659922517836094, + "epoch": 9.34, + "learning_rate": 3.745383308390108e-05, + "loss": 0.0153, + "step": 9838, + "task_loss": 0.07544966042041779 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038622692227363586, + "epoch": 9.34, + "learning_rate": 3.74445929070738e-05, + "loss": 0.0406, + "step": 9839, + "task_loss": 0.05818815529346466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05248776823282242, + "epoch": 9.34, + "learning_rate": 3.74353504696263e-05, + "loss": 0.054, + "step": 9840, + "task_loss": 0.06735575944185257 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0463816337287426, + "epoch": 9.35, + "learning_rate": 3.742610577323749e-05, + "loss": 0.0579, + "step": 9841, + "task_loss": 0.16190406680107117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04839491844177246, + "epoch": 9.35, + "learning_rate": 3.7416858819586724e-05, + "loss": 0.0574, + "step": 9842, + "task_loss": 0.1388835310935974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0757160484790802, + "epoch": 9.35, + "learning_rate": 3.740760961035375e-05, + "loss": 0.0747, + "step": 9843, + "task_loss": 0.06535409390926361 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02673790417611599, + "epoch": 9.35, + "learning_rate": 3.739835814721874e-05, + "loss": 0.0248, + "step": 9844, + "task_loss": 0.007372252643108368 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018942799419164658, + "epoch": 9.35, + "learning_rate": 3.738910443186226e-05, + "loss": 0.0175, + "step": 9845, + "task_loss": 0.004348035901784897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020467443391680717, + "epoch": 9.35, + "learning_rate": 3.737984846596528e-05, + "loss": 0.0203, + "step": 9846, + "task_loss": 0.018603015691041946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020978864282369614, + "epoch": 9.35, + "learning_rate": 3.737059025120922e-05, + "loss": 0.02, + "step": 9847, + "task_loss": 0.01105569303035736 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06590712815523148, + "epoch": 9.35, + "learning_rate": 3.7361329789275855e-05, + "loss": 0.0767, + "step": 9848, + "task_loss": 0.1739407181739807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0286872461438179, + "epoch": 9.35, + "learning_rate": 3.7352067081847405e-05, + "loss": 0.0266, + "step": 9849, + "task_loss": 0.007965076714754105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08515746146440506, + "epoch": 9.35, + "learning_rate": 3.734280213060649e-05, + "loss": 0.0818, + "step": 9850, + "task_loss": 0.05148651823401451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02829767018556595, + "epoch": 9.36, + "learning_rate": 3.7333534937236105e-05, + "loss": 0.0399, + "step": 9851, + "task_loss": 0.14469148218631744 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0619497075676918, + "epoch": 9.36, + "learning_rate": 3.7324265503419716e-05, + "loss": 0.0694, + "step": 9852, + "task_loss": 0.13681992888450623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02087024413049221, + "epoch": 9.36, + "learning_rate": 3.731499383084114e-05, + "loss": 0.0201, + "step": 9853, + "task_loss": 0.012817522510886192 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05007238686084747, + "epoch": 9.36, + "learning_rate": 3.730571992118462e-05, + "loss": 0.0491, + "step": 9854, + "task_loss": 0.040530577301979065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01564808376133442, + "epoch": 9.36, + "learning_rate": 3.7296443776134814e-05, + "loss": 0.0155, + "step": 9855, + "task_loss": 0.013979200273752213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01755111664533615, + "epoch": 9.36, + "learning_rate": 3.7287165397376775e-05, + "loss": 0.0163, + "step": 9856, + "task_loss": 0.004699693992733955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09714722633361816, + "epoch": 9.36, + "learning_rate": 3.727788478659597e-05, + "loss": 0.106, + "step": 9857, + "task_loss": 0.18595141172409058 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0596044659614563, + "epoch": 9.36, + "learning_rate": 3.726860194547826e-05, + "loss": 0.0652, + "step": 9858, + "task_loss": 0.11533902585506439 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009165867231786251, + "epoch": 9.36, + "learning_rate": 3.725931687570992e-05, + "loss": 0.0224, + "step": 9859, + "task_loss": 0.1417846381664276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008022119291126728, + "epoch": 9.36, + "learning_rate": 3.7250029578977625e-05, + "loss": 0.0076, + "step": 9860, + "task_loss": 0.004271337762475014 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1454535573720932, + "epoch": 9.36, + "learning_rate": 3.724074005696847e-05, + "loss": 0.1585, + "step": 9861, + "task_loss": 0.2754858136177063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0446578785777092, + "epoch": 9.37, + "learning_rate": 3.723144831136992e-05, + "loss": 0.0508, + "step": 9862, + "task_loss": 0.10627266019582748 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009835630655288696, + "epoch": 9.37, + "learning_rate": 3.722215434386988e-05, + "loss": 0.0124, + "step": 9863, + "task_loss": 0.03548569977283478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07582224905490875, + "epoch": 9.37, + "learning_rate": 3.721285815615665e-05, + "loss": 0.088, + "step": 9864, + "task_loss": 0.19718605279922485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02364090085029602, + "epoch": 9.37, + "learning_rate": 3.7203559749918904e-05, + "loss": 0.0319, + "step": 9865, + "task_loss": 0.10592564195394516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06448841094970703, + "epoch": 9.37, + "learning_rate": 3.7194259126845764e-05, + "loss": 0.0599, + "step": 9866, + "task_loss": 0.018569234758615494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018728742375969887, + "epoch": 9.37, + "learning_rate": 3.7184956288626724e-05, + "loss": 0.0172, + "step": 9867, + "task_loss": 0.00367145799100399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0083607267588377, + "epoch": 9.37, + "learning_rate": 3.71756512369517e-05, + "loss": 0.008, + "step": 9868, + "task_loss": 0.004354575648903847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02431337907910347, + "epoch": 9.37, + "learning_rate": 3.716634397351097e-05, + "loss": 0.0261, + "step": 9869, + "task_loss": 0.04231826961040497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028572488576173782, + "epoch": 9.37, + "learning_rate": 3.715703449999528e-05, + "loss": 0.0353, + "step": 9870, + "task_loss": 0.09551827609539032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015134245157241821, + "epoch": 9.37, + "learning_rate": 3.7147722818095724e-05, + "loss": 0.0236, + "step": 9871, + "task_loss": 0.09958845376968384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01670273393392563, + "epoch": 9.38, + "learning_rate": 3.713840892950381e-05, + "loss": 0.0327, + "step": 9872, + "task_loss": 0.17647552490234375 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07219713926315308, + "epoch": 9.38, + "learning_rate": 3.712909283591145e-05, + "loss": 0.0823, + "step": 9873, + "task_loss": 0.17350198328495026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0580780953168869, + "epoch": 9.38, + "learning_rate": 3.7119774539010967e-05, + "loss": 0.0533, + "step": 9874, + "task_loss": 0.010585742071270943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03432053327560425, + "epoch": 9.38, + "learning_rate": 3.711045404049507e-05, + "loss": 0.0325, + "step": 9875, + "task_loss": 0.016367098316550255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014983313158154488, + "epoch": 9.38, + "learning_rate": 3.710113134205687e-05, + "loss": 0.0138, + "step": 9876, + "task_loss": 0.002722267061471939 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04396282881498337, + "epoch": 9.38, + "learning_rate": 3.709180644538988e-05, + "loss": 0.048, + "step": 9877, + "task_loss": 0.08390352874994278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014825180172920227, + "epoch": 9.38, + "learning_rate": 3.708247935218802e-05, + "loss": 0.0202, + "step": 9878, + "task_loss": 0.06829116493463516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1081385537981987, + "epoch": 9.38, + "learning_rate": 3.707315006414559e-05, + "loss": 0.1184, + "step": 9879, + "task_loss": 0.21026736497879028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06750968843698502, + "epoch": 9.38, + "learning_rate": 3.706381858295731e-05, + "loss": 0.0675, + "step": 9880, + "task_loss": 0.06727669388055801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02159801311790943, + "epoch": 9.38, + "learning_rate": 3.705448491031829e-05, + "loss": 0.0277, + "step": 9881, + "task_loss": 0.08293137699365616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07070346176624298, + "epoch": 9.38, + "learning_rate": 3.7045149047924016e-05, + "loss": 0.071, + "step": 9882, + "task_loss": 0.07330311834812164 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014572136104106903, + "epoch": 9.39, + "learning_rate": 3.703581099747041e-05, + "loss": 0.0231, + "step": 9883, + "task_loss": 0.09984834492206573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.055414482951164246, + "epoch": 9.39, + "learning_rate": 3.702647076065378e-05, + "loss": 0.0554, + "step": 9884, + "task_loss": 0.055326469242572784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025832070037722588, + "epoch": 9.39, + "learning_rate": 3.701712833917082e-05, + "loss": 0.035, + "step": 9885, + "task_loss": 0.1178750991821289 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013981124386191368, + "epoch": 9.39, + "learning_rate": 3.700778373471861e-05, + "loss": 0.0131, + "step": 9886, + "task_loss": 0.005626115947961807 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024125682190060616, + "epoch": 9.39, + "learning_rate": 3.699843694899467e-05, + "loss": 0.0283, + "step": 9887, + "task_loss": 0.06636115163564682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022186098620295525, + "epoch": 9.39, + "learning_rate": 3.698908798369686e-05, + "loss": 0.0206, + "step": 9888, + "task_loss": 0.006147833541035652 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09722729027271271, + "epoch": 9.39, + "learning_rate": 3.697973684052347e-05, + "loss": 0.1004, + "step": 9889, + "task_loss": 0.12891815602779388 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.006281149573624134, + "epoch": 9.39, + "learning_rate": 3.697038352117321e-05, + "loss": 0.0182, + "step": 9890, + "task_loss": 0.12592127919197083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014334081672132015, + "epoch": 9.39, + "learning_rate": 3.6961028027345114e-05, + "loss": 0.024, + "step": 9891, + "task_loss": 0.11108017712831497 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07208122313022614, + "epoch": 9.39, + "learning_rate": 3.695167036073868e-05, + "loss": 0.0769, + "step": 9892, + "task_loss": 0.12063595652580261 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04806242883205414, + "epoch": 9.4, + "learning_rate": 3.694231052305376e-05, + "loss": 0.0582, + "step": 9893, + "task_loss": 0.14957112073898315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022140424698591232, + "epoch": 9.4, + "learning_rate": 3.693294851599063e-05, + "loss": 0.0354, + "step": 9894, + "task_loss": 0.1551881581544876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017354466021060944, + "epoch": 9.4, + "learning_rate": 3.692358434124992e-05, + "loss": 0.021, + "step": 9895, + "task_loss": 0.05346723645925522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02076183259487152, + "epoch": 9.4, + "learning_rate": 3.69142180005327e-05, + "loss": 0.0196, + "step": 9896, + "task_loss": 0.009420402348041534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0411936454474926, + "epoch": 9.4, + "learning_rate": 3.69048494955404e-05, + "loss": 0.0466, + "step": 9897, + "task_loss": 0.09573011100292206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03919178247451782, + "epoch": 9.4, + "learning_rate": 3.689547882797485e-05, + "loss": 0.0358, + "step": 9898, + "task_loss": 0.005710486322641373 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0696905106306076, + "epoch": 9.4, + "learning_rate": 3.688610599953828e-05, + "loss": 0.0671, + "step": 9899, + "task_loss": 0.0438590869307518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07281279563903809, + "epoch": 9.4, + "learning_rate": 3.6876731011933316e-05, + "loss": 0.0817, + "step": 9900, + "task_loss": 0.1620171219110489 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016692060977220535, + "epoch": 9.4, + "learning_rate": 3.686735386686296e-05, + "loss": 0.0153, + "step": 9901, + "task_loss": 0.002423325553536415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03054118901491165, + "epoch": 9.4, + "learning_rate": 3.685797456603062e-05, + "loss": 0.0343, + "step": 9902, + "task_loss": 0.06791390478610992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013188479468226433, + "epoch": 9.4, + "learning_rate": 3.684859311114009e-05, + "loss": 0.0183, + "step": 9903, + "task_loss": 0.06404251605272293 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030764251947402954, + "epoch": 9.41, + "learning_rate": 3.6839209503895566e-05, + "loss": 0.0468, + "step": 9904, + "task_loss": 0.19135428965091705 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024435993283987045, + "epoch": 9.41, + "learning_rate": 3.6829823746001616e-05, + "loss": 0.03, + "step": 9905, + "task_loss": 0.08053772151470184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03574752062559128, + "epoch": 9.41, + "learning_rate": 3.6820435839163205e-05, + "loss": 0.04, + "step": 9906, + "task_loss": 0.07870674878358841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13980330526828766, + "epoch": 9.41, + "learning_rate": 3.68110457850857e-05, + "loss": 0.1499, + "step": 9907, + "task_loss": 0.24049611389636993 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0633138120174408, + "epoch": 9.41, + "learning_rate": 3.680165358547484e-05, + "loss": 0.0682, + "step": 9908, + "task_loss": 0.1119156926870346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03244736045598984, + "epoch": 9.41, + "learning_rate": 3.6792259242036776e-05, + "loss": 0.0359, + "step": 9909, + "task_loss": 0.06734733283519745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020193520933389664, + "epoch": 9.41, + "learning_rate": 3.678286275647802e-05, + "loss": 0.0188, + "step": 9910, + "task_loss": 0.006022298708558083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01735522225499153, + "epoch": 9.41, + "learning_rate": 3.677346413050551e-05, + "loss": 0.0171, + "step": 9911, + "task_loss": 0.014364780858159065 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03136299550533295, + "epoch": 9.41, + "learning_rate": 3.6764063365826525e-05, + "loss": 0.0348, + "step": 9912, + "task_loss": 0.06544046103954315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03162465989589691, + "epoch": 9.41, + "learning_rate": 3.675466046414878e-05, + "loss": 0.0339, + "step": 9913, + "task_loss": 0.05433046445250511 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028096985071897507, + "epoch": 9.42, + "learning_rate": 3.674525542718035e-05, + "loss": 0.0265, + "step": 9914, + "task_loss": 0.012334998697042465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10660208761692047, + "epoch": 9.42, + "learning_rate": 3.6735848256629705e-05, + "loss": 0.1089, + "step": 9915, + "task_loss": 0.12940451502799988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05087193101644516, + "epoch": 9.42, + "learning_rate": 3.6726438954205714e-05, + "loss": 0.06, + "step": 9916, + "task_loss": 0.14205744862556458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.035260528326034546, + "epoch": 9.42, + "learning_rate": 3.6717027521617595e-05, + "loss": 0.0326, + "step": 9917, + "task_loss": 0.009002592414617538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036381468176841736, + "epoch": 9.42, + "learning_rate": 3.6707613960575006e-05, + "loss": 0.0375, + "step": 9918, + "task_loss": 0.048002809286117554 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01758906990289688, + "epoch": 9.42, + "learning_rate": 3.669819827278795e-05, + "loss": 0.0164, + "step": 9919, + "task_loss": 0.005666827782988548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05025162547826767, + "epoch": 9.42, + "learning_rate": 3.668878045996685e-05, + "loss": 0.0592, + "step": 9920, + "task_loss": 0.13993430137634277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014506649225950241, + "epoch": 9.42, + "learning_rate": 3.667936052382248e-05, + "loss": 0.0134, + "step": 9921, + "task_loss": 0.0035849660634994507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0744752436876297, + "epoch": 9.42, + "learning_rate": 3.666993846606602e-05, + "loss": 0.0755, + "step": 9922, + "task_loss": 0.0845727026462555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037520673125982285, + "epoch": 9.42, + "learning_rate": 3.666051428840904e-05, + "loss": 0.0409, + "step": 9923, + "task_loss": 0.07118186354637146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007355409674346447, + "epoch": 9.42, + "learning_rate": 3.665108799256348e-05, + "loss": 0.007, + "step": 9924, + "task_loss": 0.003476981073617935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012398531660437584, + "epoch": 9.43, + "learning_rate": 3.6641659580241665e-05, + "loss": 0.0276, + "step": 9925, + "task_loss": 0.16440680623054504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10659578442573547, + "epoch": 9.43, + "learning_rate": 3.663222905315633e-05, + "loss": 0.1168, + "step": 9926, + "task_loss": 0.20838633179664612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06712566316127777, + "epoch": 9.43, + "learning_rate": 3.662279641302056e-05, + "loss": 0.076, + "step": 9927, + "task_loss": 0.15624761581420898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05237956717610359, + "epoch": 9.43, + "learning_rate": 3.6613361661547854e-05, + "loss": 0.052, + "step": 9928, + "task_loss": 0.04885098338127136 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05890313908457756, + "epoch": 9.43, + "learning_rate": 3.660392480045206e-05, + "loss": 0.0631, + "step": 9929, + "task_loss": 0.10126684606075287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020466038957238197, + "epoch": 9.43, + "learning_rate": 3.659448583144745e-05, + "loss": 0.0355, + "step": 9930, + "task_loss": 0.1712527871131897 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01883137971162796, + "epoch": 9.43, + "learning_rate": 3.658504475624865e-05, + "loss": 0.0175, + "step": 9931, + "task_loss": 0.005616925656795502 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019618559628725052, + "epoch": 9.43, + "learning_rate": 3.657560157657067e-05, + "loss": 0.0181, + "step": 9932, + "task_loss": 0.003942342475056648 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022781796753406525, + "epoch": 9.43, + "learning_rate": 3.656615629412892e-05, + "loss": 0.0382, + "step": 9933, + "task_loss": 0.17709828913211823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021544065326452255, + "epoch": 9.43, + "learning_rate": 3.655670891063917e-05, + "loss": 0.0356, + "step": 9934, + "task_loss": 0.1624692678451538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037996262311935425, + "epoch": 9.43, + "learning_rate": 3.6547259427817595e-05, + "loss": 0.0421, + "step": 9935, + "task_loss": 0.07946674525737762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06063329428434372, + "epoch": 9.44, + "learning_rate": 3.6537807847380726e-05, + "loss": 0.0579, + "step": 9936, + "task_loss": 0.03335261717438698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03863832727074623, + "epoch": 9.44, + "learning_rate": 3.65283541710455e-05, + "loss": 0.046, + "step": 9937, + "task_loss": 0.11236982047557831 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01804421842098236, + "epoch": 9.44, + "learning_rate": 3.6518898400529214e-05, + "loss": 0.0172, + "step": 9938, + "task_loss": 0.009426100179553032 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03564087301492691, + "epoch": 9.44, + "learning_rate": 3.650944053754956e-05, + "loss": 0.0408, + "step": 9939, + "task_loss": 0.08679479360580444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037948817014694214, + "epoch": 9.44, + "learning_rate": 3.6499980583824606e-05, + "loss": 0.0351, + "step": 9940, + "task_loss": 0.009799247607588768 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020474538207054138, + "epoch": 9.44, + "learning_rate": 3.64905185410728e-05, + "loss": 0.0258, + "step": 9941, + "task_loss": 0.07416295260190964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07244652509689331, + "epoch": 9.44, + "learning_rate": 3.6481054411012946e-05, + "loss": 0.0706, + "step": 9942, + "task_loss": 0.05382921174168587 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03393717110157013, + "epoch": 9.44, + "learning_rate": 3.647158819536427e-05, + "loss": 0.0355, + "step": 9943, + "task_loss": 0.04978133365511894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02722102776169777, + "epoch": 9.44, + "learning_rate": 3.646211989584635e-05, + "loss": 0.0334, + "step": 9944, + "task_loss": 0.08923061192035675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0162800382822752, + "epoch": 9.44, + "learning_rate": 3.645264951417915e-05, + "loss": 0.0222, + "step": 9945, + "task_loss": 0.0759543627500534 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012943320907652378, + "epoch": 9.45, + "learning_rate": 3.644317705208301e-05, + "loss": 0.0122, + "step": 9946, + "task_loss": 0.005832251161336899 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06934761255979538, + "epoch": 9.45, + "learning_rate": 3.643370251127865e-05, + "loss": 0.0795, + "step": 9947, + "task_loss": 0.17082899808883667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03814961761236191, + "epoch": 9.45, + "learning_rate": 3.6424225893487166e-05, + "loss": 0.0484, + "step": 9948, + "task_loss": 0.140329971909523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03574157506227493, + "epoch": 9.45, + "learning_rate": 3.641474720043002e-05, + "loss": 0.0353, + "step": 9949, + "task_loss": 0.03175017610192299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06168286129832268, + "epoch": 9.45, + "learning_rate": 3.6405266433829075e-05, + "loss": 0.0607, + "step": 9950, + "task_loss": 0.05187152698636055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.033880721777677536, + "epoch": 9.45, + "learning_rate": 3.639578359540655e-05, + "loss": 0.0376, + "step": 9951, + "task_loss": 0.07117892801761627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019045129418373108, + "epoch": 9.45, + "learning_rate": 3.638629868688506e-05, + "loss": 0.0257, + "step": 9952, + "task_loss": 0.08584122359752655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024328162893652916, + "epoch": 9.45, + "learning_rate": 3.6376811709987574e-05, + "loss": 0.0236, + "step": 9953, + "task_loss": 0.016677698120474815 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05967606231570244, + "epoch": 9.45, + "learning_rate": 3.636732266643745e-05, + "loss": 0.0675, + "step": 9954, + "task_loss": 0.13744193315505981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09701327234506607, + "epoch": 9.45, + "learning_rate": 3.635783155795841e-05, + "loss": 0.1019, + "step": 9955, + "task_loss": 0.14588984847068787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022994887083768845, + "epoch": 9.45, + "learning_rate": 3.634833838627458e-05, + "loss": 0.0332, + "step": 9956, + "task_loss": 0.12463461607694626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021844923496246338, + "epoch": 9.46, + "learning_rate": 3.6338843153110424e-05, + "loss": 0.0288, + "step": 9957, + "task_loss": 0.09174901992082596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04523449391126633, + "epoch": 9.46, + "learning_rate": 3.63293458601908e-05, + "loss": 0.0436, + "step": 9958, + "task_loss": 0.028626611456274986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015888435766100883, + "epoch": 9.46, + "learning_rate": 3.631984650924094e-05, + "loss": 0.0149, + "step": 9959, + "task_loss": 0.006228139623999596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04058516398072243, + "epoch": 9.46, + "learning_rate": 3.631034510198643e-05, + "loss": 0.0452, + "step": 9960, + "task_loss": 0.08676454424858093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028549985960125923, + "epoch": 9.46, + "learning_rate": 3.630084164015328e-05, + "loss": 0.0261, + "step": 9961, + "task_loss": 0.004079824313521385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05375894159078598, + "epoch": 9.46, + "learning_rate": 3.6291336125467814e-05, + "loss": 0.0524, + "step": 9962, + "task_loss": 0.040088847279548645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019988451153039932, + "epoch": 9.46, + "learning_rate": 3.628182855965676e-05, + "loss": 0.025, + "step": 9963, + "task_loss": 0.07049809396266937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01588454097509384, + "epoch": 9.46, + "learning_rate": 3.627231894444721e-05, + "loss": 0.0162, + "step": 9964, + "task_loss": 0.019161686301231384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07747796177864075, + "epoch": 9.46, + "learning_rate": 3.6262807281566634e-05, + "loss": 0.0743, + "step": 9965, + "task_loss": 0.04522999748587608 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03719233348965645, + "epoch": 9.46, + "learning_rate": 3.6253293572742884e-05, + "loss": 0.0341, + "step": 9966, + "task_loss": 0.006006931886076927 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022969527170062065, + "epoch": 9.47, + "learning_rate": 3.624377781970416e-05, + "loss": 0.0218, + "step": 9967, + "task_loss": 0.011378584429621696 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02225591614842415, + "epoch": 9.47, + "learning_rate": 3.6234260024179033e-05, + "loss": 0.0206, + "step": 9968, + "task_loss": 0.005647040903568268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015963654965162277, + "epoch": 9.47, + "learning_rate": 3.622474018789648e-05, + "loss": 0.0153, + "step": 9969, + "task_loss": 0.009000055491924286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012255464680492878, + "epoch": 9.47, + "learning_rate": 3.62152183125858e-05, + "loss": 0.0114, + "step": 9970, + "task_loss": 0.0038295499980449677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02565714716911316, + "epoch": 9.47, + "learning_rate": 3.620569439997671e-05, + "loss": 0.0497, + "step": 9971, + "task_loss": 0.2665744721889496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.033734291791915894, + "epoch": 9.47, + "learning_rate": 3.6196168451799266e-05, + "loss": 0.0376, + "step": 9972, + "task_loss": 0.07192590832710266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019625328481197357, + "epoch": 9.47, + "learning_rate": 3.618664046978389e-05, + "loss": 0.0229, + "step": 9973, + "task_loss": 0.052226584404706955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07318352907896042, + "epoch": 9.47, + "learning_rate": 3.617711045566141e-05, + "loss": 0.0663, + "step": 9974, + "task_loss": 0.004145057871937752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10104160010814667, + "epoch": 9.47, + "learning_rate": 3.616757841116298e-05, + "loss": 0.0914, + "step": 9975, + "task_loss": 0.0048540495336055756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0499558225274086, + "epoch": 9.47, + "learning_rate": 3.6158044338020155e-05, + "loss": 0.0594, + "step": 9976, + "task_loss": 0.1440429389476776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018780533224344254, + "epoch": 9.47, + "learning_rate": 3.614850823796483e-05, + "loss": 0.0174, + "step": 9977, + "task_loss": 0.0051146019250154495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021981973201036453, + "epoch": 9.48, + "learning_rate": 3.6138970112729296e-05, + "loss": 0.0208, + "step": 9978, + "task_loss": 0.010115953162312508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013006994500756264, + "epoch": 9.48, + "learning_rate": 3.612942996404619e-05, + "loss": 0.0122, + "step": 9979, + "task_loss": 0.0053765904158353806 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04389118403196335, + "epoch": 9.48, + "learning_rate": 3.611988779364853e-05, + "loss": 0.0429, + "step": 9980, + "task_loss": 0.034226249903440475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.062232695519924164, + "epoch": 9.48, + "learning_rate": 3.611034360326971e-05, + "loss": 0.0623, + "step": 9981, + "task_loss": 0.062431663274765015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08030220866203308, + "epoch": 9.48, + "learning_rate": 3.6100797394643455e-05, + "loss": 0.0799, + "step": 9982, + "task_loss": 0.07579746097326279 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09911078959703445, + "epoch": 9.48, + "learning_rate": 3.60912491695039e-05, + "loss": 0.0947, + "step": 9983, + "task_loss": 0.054534316062927246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018304534256458282, + "epoch": 9.48, + "learning_rate": 3.608169892958551e-05, + "loss": 0.0293, + "step": 9984, + "task_loss": 0.1278255134820938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0267410259693861, + "epoch": 9.48, + "learning_rate": 3.607214667662314e-05, + "loss": 0.0337, + "step": 9985, + "task_loss": 0.09607288241386414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012940805405378342, + "epoch": 9.48, + "learning_rate": 3.606259241235201e-05, + "loss": 0.0164, + "step": 9986, + "task_loss": 0.04748379439115524 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05529266595840454, + "epoch": 9.48, + "learning_rate": 3.605303613850768e-05, + "loss": 0.0547, + "step": 9987, + "task_loss": 0.048941053450107574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0491475947201252, + "epoch": 9.49, + "learning_rate": 3.604347785682611e-05, + "loss": 0.0475, + "step": 9988, + "task_loss": 0.03230959177017212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.042645324021577835, + "epoch": 9.49, + "learning_rate": 3.60339175690436e-05, + "loss": 0.0437, + "step": 9989, + "task_loss": 0.0532299280166626 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06176585331559181, + "epoch": 9.49, + "learning_rate": 3.602435527689683e-05, + "loss": 0.0598, + "step": 9990, + "task_loss": 0.04228825867176056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015652582049369812, + "epoch": 9.49, + "learning_rate": 3.6014790982122816e-05, + "loss": 0.024, + "step": 9991, + "task_loss": 0.09889158606529236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04341176524758339, + "epoch": 9.49, + "learning_rate": 3.6005224686458985e-05, + "loss": 0.0519, + "step": 9992, + "task_loss": 0.1284147948026657 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04502232372760773, + "epoch": 9.49, + "learning_rate": 3.599565639164308e-05, + "loss": 0.0422, + "step": 9993, + "task_loss": 0.01630707085132599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029705507680773735, + "epoch": 9.49, + "learning_rate": 3.5986086099413234e-05, + "loss": 0.0288, + "step": 9994, + "task_loss": 0.020898183807730675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03598994016647339, + "epoch": 9.49, + "learning_rate": 3.597651381150795e-05, + "loss": 0.034, + "step": 9995, + "task_loss": 0.015627581626176834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03647323325276375, + "epoch": 9.49, + "learning_rate": 3.5966939529666056e-05, + "loss": 0.0337, + "step": 9996, + "task_loss": 0.008715398609638214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022679002955555916, + "epoch": 9.49, + "learning_rate": 3.595736325562679e-05, + "loss": 0.0215, + "step": 9997, + "task_loss": 0.011273723095655441 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04891244322061539, + "epoch": 9.49, + "learning_rate": 3.5947784991129716e-05, + "loss": 0.0549, + "step": 9998, + "task_loss": 0.10830025374889374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023395076394081116, + "epoch": 9.5, + "learning_rate": 3.593820473791476e-05, + "loss": 0.0217, + "step": 9999, + "task_loss": 0.006120296195149422 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012700533494353294, + "epoch": 9.5, + "learning_rate": 3.5928622497722245e-05, + "loss": 0.0125, + "step": 10000, + "task_loss": 0.010491423308849335 + }, + { + "epoch": 9.5, + "eval_accuracy": 0.8944954128440367, + "eval_loss": 0.4492689073085785, + "eval_runtime": 18.1708, + "eval_samples_per_second": 47.989, + "eval_steps_per_second": 5.999, + "step": 10000 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07806367427110672, + "epoch": 9.5, + "learning_rate": 3.591903827229282e-05, + "loss": 0.0813, + "step": 10001, + "task_loss": 0.11088013648986816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012855404987931252, + "epoch": 9.5, + "learning_rate": 3.590945206336751e-05, + "loss": 0.0119, + "step": 10002, + "task_loss": 0.002988189458847046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06485391408205032, + "epoch": 9.5, + "learning_rate": 3.5899863872687675e-05, + "loss": 0.0683, + "step": 10003, + "task_loss": 0.09926420450210571 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02057585120201111, + "epoch": 9.5, + "learning_rate": 3.58902737019951e-05, + "loss": 0.0203, + "step": 10004, + "task_loss": 0.018218940123915672 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03241540864109993, + "epoch": 9.5, + "learning_rate": 3.5880681553031835e-05, + "loss": 0.0318, + "step": 10005, + "task_loss": 0.026616670191287994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01615779660642147, + "epoch": 9.5, + "learning_rate": 3.5871087427540375e-05, + "loss": 0.0153, + "step": 10006, + "task_loss": 0.007706159725785255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03969818726181984, + "epoch": 9.5, + "learning_rate": 3.586149132726353e-05, + "loss": 0.0375, + "step": 10007, + "task_loss": 0.01816389709711075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017227375879883766, + "epoch": 9.5, + "learning_rate": 3.585189325394447e-05, + "loss": 0.0161, + "step": 10008, + "task_loss": 0.006397966295480728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01556732039898634, + "epoch": 9.51, + "learning_rate": 3.5842293209326746e-05, + "loss": 0.0145, + "step": 10009, + "task_loss": 0.00453593023121357 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012093789875507355, + "epoch": 9.51, + "learning_rate": 3.583269119515423e-05, + "loss": 0.0182, + "step": 10010, + "task_loss": 0.0733606368303299 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029773391783237457, + "epoch": 9.51, + "learning_rate": 3.58230872131712e-05, + "loss": 0.0276, + "step": 10011, + "task_loss": 0.00802118144929409 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015580402687191963, + "epoch": 9.51, + "learning_rate": 3.581348126512225e-05, + "loss": 0.0147, + "step": 10012, + "task_loss": 0.007100388407707214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01875162497162819, + "epoch": 9.51, + "learning_rate": 3.5803873352752343e-05, + "loss": 0.0177, + "step": 10013, + "task_loss": 0.00872032716870308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07013194262981415, + "epoch": 9.51, + "learning_rate": 3.5794263477806816e-05, + "loss": 0.074, + "step": 10014, + "task_loss": 0.10908061265945435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023847414180636406, + "epoch": 9.51, + "learning_rate": 3.578465164203134e-05, + "loss": 0.0376, + "step": 10015, + "task_loss": 0.16116636991500854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010446256026625633, + "epoch": 9.51, + "learning_rate": 3.577503784717195e-05, + "loss": 0.0099, + "step": 10016, + "task_loss": 0.005132999271154404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.046971529722213745, + "epoch": 9.51, + "learning_rate": 3.576542209497505e-05, + "loss": 0.0496, + "step": 10017, + "task_loss": 0.0734739899635315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020745940506458282, + "epoch": 9.51, + "learning_rate": 3.575580438718738e-05, + "loss": 0.0244, + "step": 10018, + "task_loss": 0.057412195950746536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017170149832963943, + "epoch": 9.51, + "learning_rate": 3.574618472555604e-05, + "loss": 0.0273, + "step": 10019, + "task_loss": 0.11874374747276306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018801424652338028, + "epoch": 9.52, + "learning_rate": 3.573656311182848e-05, + "loss": 0.0281, + "step": 10020, + "task_loss": 0.11174070090055466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013433647342026234, + "epoch": 9.52, + "learning_rate": 3.5726939547752536e-05, + "loss": 0.0126, + "step": 10021, + "task_loss": 0.005174616351723671 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010112293064594269, + "epoch": 9.52, + "learning_rate": 3.5717314035076355e-05, + "loss": 0.0095, + "step": 10022, + "task_loss": 0.003835149109363556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041021257638931274, + "epoch": 9.52, + "learning_rate": 3.570768657554847e-05, + "loss": 0.0591, + "step": 10023, + "task_loss": 0.22215372323989868 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03894636034965515, + "epoch": 9.52, + "learning_rate": 3.569805717091775e-05, + "loss": 0.0423, + "step": 10024, + "task_loss": 0.07290509343147278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019539494067430496, + "epoch": 9.52, + "learning_rate": 3.5688425822933414e-05, + "loss": 0.0342, + "step": 10025, + "task_loss": 0.1657709777355194 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03588699549436569, + "epoch": 9.52, + "learning_rate": 3.5678792533345055e-05, + "loss": 0.0434, + "step": 10026, + "task_loss": 0.111113041639328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.058800891041755676, + "epoch": 9.52, + "learning_rate": 3.5669157303902604e-05, + "loss": 0.0656, + "step": 10027, + "task_loss": 0.12722201645374298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02307158336043358, + "epoch": 9.52, + "learning_rate": 3.565952013635635e-05, + "loss": 0.0256, + "step": 10028, + "task_loss": 0.04787431284785271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012087054550647736, + "epoch": 9.52, + "learning_rate": 3.564988103245692e-05, + "loss": 0.0137, + "step": 10029, + "task_loss": 0.028024591505527496 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014830454252660275, + "epoch": 9.53, + "learning_rate": 3.564023999395531e-05, + "loss": 0.0216, + "step": 10030, + "task_loss": 0.08278335630893707 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020527433604002, + "epoch": 9.53, + "learning_rate": 3.563059702260287e-05, + "loss": 0.0193, + "step": 10031, + "task_loss": 0.00791563093662262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08408955484628677, + "epoch": 9.53, + "learning_rate": 3.562095212015128e-05, + "loss": 0.0893, + "step": 10032, + "task_loss": 0.13649523258209229 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0584905743598938, + "epoch": 9.53, + "learning_rate": 3.5611305288352576e-05, + "loss": 0.0576, + "step": 10033, + "task_loss": 0.04989843815565109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09185069054365158, + "epoch": 9.53, + "learning_rate": 3.560165652895917e-05, + "loss": 0.0895, + "step": 10034, + "task_loss": 0.0679357573390007 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05496165156364441, + "epoch": 9.53, + "learning_rate": 3.5592005843723794e-05, + "loss": 0.06, + "step": 10035, + "task_loss": 0.10506260395050049 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02257445454597473, + "epoch": 9.53, + "learning_rate": 3.558235323439955e-05, + "loss": 0.0208, + "step": 10036, + "task_loss": 0.004368903115391731 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03320860490202904, + "epoch": 9.53, + "learning_rate": 3.557269870273987e-05, + "loss": 0.0343, + "step": 10037, + "task_loss": 0.04424959793686867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03928327187895775, + "epoch": 9.53, + "learning_rate": 3.5563042250498556e-05, + "loss": 0.0388, + "step": 10038, + "task_loss": 0.034501951187849045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06444701552391052, + "epoch": 9.53, + "learning_rate": 3.555338387942974e-05, + "loss": 0.0616, + "step": 10039, + "task_loss": 0.03561048209667206 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05409969761967659, + "epoch": 9.53, + "learning_rate": 3.5543723591287916e-05, + "loss": 0.0558, + "step": 10040, + "task_loss": 0.07060417532920837 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09123320877552032, + "epoch": 9.54, + "learning_rate": 3.5534061387827936e-05, + "loss": 0.1023, + "step": 10041, + "task_loss": 0.202234148979187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014315794222056866, + "epoch": 9.54, + "learning_rate": 3.552439727080495e-05, + "loss": 0.0223, + "step": 10042, + "task_loss": 0.09408995509147644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01862926036119461, + "epoch": 9.54, + "learning_rate": 3.5514731241974544e-05, + "loss": 0.0183, + "step": 10043, + "task_loss": 0.015779605135321617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026581861078739166, + "epoch": 9.54, + "learning_rate": 3.5505063303092545e-05, + "loss": 0.0252, + "step": 10044, + "task_loss": 0.012902749702334404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020298294723033905, + "epoch": 9.54, + "learning_rate": 3.549539345591521e-05, + "loss": 0.0195, + "step": 10045, + "task_loss": 0.012040220201015472 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09082924574613571, + "epoch": 9.54, + "learning_rate": 3.5485721702199104e-05, + "loss": 0.0905, + "step": 10046, + "task_loss": 0.0875604897737503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04488201439380646, + "epoch": 9.54, + "learning_rate": 3.547604804370116e-05, + "loss": 0.0489, + "step": 10047, + "task_loss": 0.08512827754020691 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024493440985679626, + "epoch": 9.54, + "learning_rate": 3.5466372482178635e-05, + "loss": 0.0231, + "step": 10048, + "task_loss": 0.010883405804634094 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03374480828642845, + "epoch": 9.54, + "learning_rate": 3.545669501938913e-05, + "loss": 0.031, + "step": 10049, + "task_loss": 0.005990535020828247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022520992904901505, + "epoch": 9.54, + "learning_rate": 3.544701565709063e-05, + "loss": 0.0242, + "step": 10050, + "task_loss": 0.03893338143825531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00832436978816986, + "epoch": 9.55, + "learning_rate": 3.54373343970414e-05, + "loss": 0.0157, + "step": 10051, + "task_loss": 0.08178244531154633 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0154949389398098, + "epoch": 9.55, + "learning_rate": 3.542765124100014e-05, + "loss": 0.0143, + "step": 10052, + "task_loss": 0.0035653971135616302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01720263808965683, + "epoch": 9.55, + "learning_rate": 3.541796619072579e-05, + "loss": 0.0175, + "step": 10053, + "task_loss": 0.020343858748674393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12314651161432266, + "epoch": 9.55, + "learning_rate": 3.540827924797772e-05, + "loss": 0.1117, + "step": 10054, + "task_loss": 0.00850660353899002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011486424133181572, + "epoch": 9.55, + "learning_rate": 3.5398590414515586e-05, + "loss": 0.0106, + "step": 10055, + "task_loss": 0.002300182357430458 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.006632357835769653, + "epoch": 9.55, + "learning_rate": 3.5388899692099433e-05, + "loss": 0.0235, + "step": 10056, + "task_loss": 0.17546282708644867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015405992977321148, + "epoch": 9.55, + "learning_rate": 3.537920708248961e-05, + "loss": 0.0295, + "step": 10057, + "task_loss": 0.15601424872875214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06356394290924072, + "epoch": 9.55, + "learning_rate": 3.536951258744684e-05, + "loss": 0.0721, + "step": 10058, + "task_loss": 0.1491929441690445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012521881610155106, + "epoch": 9.55, + "learning_rate": 3.5359816208732164e-05, + "loss": 0.0118, + "step": 10059, + "task_loss": 0.005132080987095833 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022264419123530388, + "epoch": 9.55, + "learning_rate": 3.535011794810698e-05, + "loss": 0.0216, + "step": 10060, + "task_loss": 0.015442634001374245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022136827930808067, + "epoch": 9.55, + "learning_rate": 3.5340417807333026e-05, + "loss": 0.0227, + "step": 10061, + "task_loss": 0.02739689312875271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013634988106787205, + "epoch": 9.56, + "learning_rate": 3.533071578817239e-05, + "loss": 0.0204, + "step": 10062, + "task_loss": 0.08093855530023575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012208834290504456, + "epoch": 9.56, + "learning_rate": 3.5321011892387455e-05, + "loss": 0.0285, + "step": 10063, + "task_loss": 0.1753706932067871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07617907971143723, + "epoch": 9.56, + "learning_rate": 3.5311306121741015e-05, + "loss": 0.0732, + "step": 10064, + "task_loss": 0.046028852462768555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010005553252995014, + "epoch": 9.56, + "learning_rate": 3.530159847799616e-05, + "loss": 0.0097, + "step": 10065, + "task_loss": 0.0069500841200351715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0498456135392189, + "epoch": 9.56, + "learning_rate": 3.529188896291632e-05, + "loss": 0.0624, + "step": 10066, + "task_loss": 0.17580851912498474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01079606544226408, + "epoch": 9.56, + "learning_rate": 3.5282177578265296e-05, + "loss": 0.01, + "step": 10067, + "task_loss": 0.003003546968102455 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06311812996864319, + "epoch": 9.56, + "learning_rate": 3.527246432580718e-05, + "loss": 0.0622, + "step": 10068, + "task_loss": 0.05363881587982178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020096108317375183, + "epoch": 9.56, + "learning_rate": 3.526274920730645e-05, + "loss": 0.0185, + "step": 10069, + "task_loss": 0.004027027636766434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016499292105436325, + "epoch": 9.56, + "learning_rate": 3.525303222452791e-05, + "loss": 0.0156, + "step": 10070, + "task_loss": 0.007455941289663315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014504550956189632, + "epoch": 9.56, + "learning_rate": 3.5243313379236685e-05, + "loss": 0.0182, + "step": 10071, + "task_loss": 0.051125288009643555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028046708554029465, + "epoch": 9.57, + "learning_rate": 3.5233592673198245e-05, + "loss": 0.037, + "step": 10072, + "task_loss": 0.11712638288736343 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.048199012875556946, + "epoch": 9.57, + "learning_rate": 3.522387010817842e-05, + "loss": 0.0448, + "step": 10073, + "task_loss": 0.013714082539081573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.12250658124685287, + "epoch": 9.57, + "learning_rate": 3.521414568594335e-05, + "loss": 0.1211, + "step": 10074, + "task_loss": 0.10803371667861938 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028487298637628555, + "epoch": 9.57, + "learning_rate": 3.520441940825952e-05, + "loss": 0.0274, + "step": 10075, + "task_loss": 0.017971640452742577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01495126262307167, + "epoch": 9.57, + "learning_rate": 3.5194691276893755e-05, + "loss": 0.0234, + "step": 10076, + "task_loss": 0.09949477761983871 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018718678504228592, + "epoch": 9.57, + "learning_rate": 3.518496129361323e-05, + "loss": 0.0328, + "step": 10077, + "task_loss": 0.1593606173992157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0187015850096941, + "epoch": 9.57, + "learning_rate": 3.5175229460185425e-05, + "loss": 0.0173, + "step": 10078, + "task_loss": 0.004929153248667717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02536281757056713, + "epoch": 9.57, + "learning_rate": 3.5165495778378196e-05, + "loss": 0.0232, + "step": 10079, + "task_loss": 0.00400426983833313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0379653237760067, + "epoch": 9.57, + "learning_rate": 3.51557602499597e-05, + "loss": 0.0348, + "step": 10080, + "task_loss": 0.006340883672237396 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047561101615428925, + "epoch": 9.57, + "learning_rate": 3.514602287669844e-05, + "loss": 0.0578, + "step": 10081, + "task_loss": 0.15020343661308289 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017001332715153694, + "epoch": 9.57, + "learning_rate": 3.5136283660363255e-05, + "loss": 0.0169, + "step": 10082, + "task_loss": 0.015564844012260437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025101391598582268, + "epoch": 9.58, + "learning_rate": 3.5126542602723334e-05, + "loss": 0.0319, + "step": 10083, + "task_loss": 0.09269939363002777 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.049932364374399185, + "epoch": 9.58, + "learning_rate": 3.5116799705548175e-05, + "loss": 0.0726, + "step": 10084, + "task_loss": 0.2766878008842468 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01646530255675316, + "epoch": 9.58, + "learning_rate": 3.510705497060762e-05, + "loss": 0.0153, + "step": 10085, + "task_loss": 0.004672164097428322 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031244024634361267, + "epoch": 9.58, + "learning_rate": 3.509730839967187e-05, + "loss": 0.0304, + "step": 10086, + "task_loss": 0.022954711690545082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0413852296769619, + "epoch": 9.58, + "learning_rate": 3.508755999451141e-05, + "loss": 0.0503, + "step": 10087, + "task_loss": 0.13052834570407867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024242881685495377, + "epoch": 9.58, + "learning_rate": 3.50778097568971e-05, + "loss": 0.0226, + "step": 10088, + "task_loss": 0.008248705416917801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014527438208460808, + "epoch": 9.58, + "learning_rate": 3.506805768860011e-05, + "loss": 0.0136, + "step": 10089, + "task_loss": 0.005055870860815048 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043073639273643494, + "epoch": 9.58, + "learning_rate": 3.505830379139195e-05, + "loss": 0.0481, + "step": 10090, + "task_loss": 0.09338083863258362 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018400005996227264, + "epoch": 9.58, + "learning_rate": 3.504854806704446e-05, + "loss": 0.017, + "step": 10091, + "task_loss": 0.004409752786159515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03231433779001236, + "epoch": 9.58, + "learning_rate": 3.503879051732983e-05, + "loss": 0.0422, + "step": 10092, + "task_loss": 0.1311528980731964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04105234518647194, + "epoch": 9.58, + "learning_rate": 3.502903114402055e-05, + "loss": 0.0461, + "step": 10093, + "task_loss": 0.09118282049894333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04761913791298866, + "epoch": 9.59, + "learning_rate": 3.501926994888946e-05, + "loss": 0.0462, + "step": 10094, + "task_loss": 0.0329703614115715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013278797268867493, + "epoch": 9.59, + "learning_rate": 3.500950693370974e-05, + "loss": 0.0124, + "step": 10095, + "task_loss": 0.004137454554438591 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021566402167081833, + "epoch": 9.59, + "learning_rate": 3.499974210025487e-05, + "loss": 0.0255, + "step": 10096, + "task_loss": 0.06101330369710922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03355410322546959, + "epoch": 9.59, + "learning_rate": 3.4989975450298694e-05, + "loss": 0.0436, + "step": 10097, + "task_loss": 0.13387075066566467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011760689318180084, + "epoch": 9.59, + "learning_rate": 3.498020698561536e-05, + "loss": 0.0109, + "step": 10098, + "task_loss": 0.0035722479224205017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.040274716913700104, + "epoch": 9.59, + "learning_rate": 3.497043670797936e-05, + "loss": 0.0377, + "step": 10099, + "task_loss": 0.014515921473503113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019877292215824127, + "epoch": 9.59, + "learning_rate": 3.496066461916552e-05, + "loss": 0.0187, + "step": 10100, + "task_loss": 0.0076863933354616165 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05636334791779518, + "epoch": 9.59, + "learning_rate": 3.495089072094898e-05, + "loss": 0.0547, + "step": 10101, + "task_loss": 0.040151506662368774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02113344892859459, + "epoch": 9.59, + "learning_rate": 3.494111501510522e-05, + "loss": 0.0195, + "step": 10102, + "task_loss": 0.0052163973450660706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015629485249519348, + "epoch": 9.59, + "learning_rate": 3.4931337503410034e-05, + "loss": 0.0238, + "step": 10103, + "task_loss": 0.09734653681516647 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013077488169074059, + "epoch": 9.6, + "learning_rate": 3.4921558187639556e-05, + "loss": 0.0125, + "step": 10104, + "task_loss": 0.006894918158650398 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014943759888410568, + "epoch": 9.6, + "learning_rate": 3.491177706957026e-05, + "loss": 0.0193, + "step": 10105, + "task_loss": 0.058562587946653366 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05631723254919052, + "epoch": 9.6, + "learning_rate": 3.490199415097892e-05, + "loss": 0.0563, + "step": 10106, + "task_loss": 0.056095875799655914 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017733752727508545, + "epoch": 9.6, + "learning_rate": 3.489220943364266e-05, + "loss": 0.0165, + "step": 10107, + "task_loss": 0.00526992604136467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10405229032039642, + "epoch": 9.6, + "learning_rate": 3.488242291933891e-05, + "loss": 0.1191, + "step": 10108, + "task_loss": 0.254643976688385 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008884737268090248, + "epoch": 9.6, + "learning_rate": 3.487263460984544e-05, + "loss": 0.0083, + "step": 10109, + "task_loss": 0.0033239684998989105 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00749258091673255, + "epoch": 9.6, + "learning_rate": 3.486284450694035e-05, + "loss": 0.007, + "step": 10110, + "task_loss": 0.0027102380990982056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.046791236847639084, + "epoch": 9.6, + "learning_rate": 3.485305261240205e-05, + "loss": 0.0484, + "step": 10111, + "task_loss": 0.06307273358106613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026928380131721497, + "epoch": 9.6, + "learning_rate": 3.4843258928009294e-05, + "loss": 0.0259, + "step": 10112, + "task_loss": 0.01632116362452507 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030285503715276718, + "epoch": 9.6, + "learning_rate": 3.4833463455541146e-05, + "loss": 0.0344, + "step": 10113, + "task_loss": 0.07147219032049179 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027739666402339935, + "epoch": 9.6, + "learning_rate": 3.4823666196777006e-05, + "loss": 0.026, + "step": 10114, + "task_loss": 0.01077285036444664 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1198190227150917, + "epoch": 9.61, + "learning_rate": 3.481386715349659e-05, + "loss": 0.119, + "step": 10115, + "task_loss": 0.11184495687484741 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02876965142786503, + "epoch": 9.61, + "learning_rate": 3.4804066327479934e-05, + "loss": 0.0307, + "step": 10116, + "task_loss": 0.04767701029777527 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06963972747325897, + "epoch": 9.61, + "learning_rate": 3.4794263720507427e-05, + "loss": 0.0748, + "step": 10117, + "task_loss": 0.12130016088485718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10158136487007141, + "epoch": 9.61, + "learning_rate": 3.478445933435973e-05, + "loss": 0.0977, + "step": 10118, + "task_loss": 0.06303709745407104 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06906269490718842, + "epoch": 9.61, + "learning_rate": 3.4774653170817884e-05, + "loss": 0.0782, + "step": 10119, + "task_loss": 0.1606631875038147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020416613668203354, + "epoch": 9.61, + "learning_rate": 3.4764845231663205e-05, + "loss": 0.0258, + "step": 10120, + "task_loss": 0.07442939281463623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014320943504571915, + "epoch": 9.61, + "learning_rate": 3.475503551867737e-05, + "loss": 0.0134, + "step": 10121, + "task_loss": 0.004721490666270256 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04403253272175789, + "epoch": 9.61, + "learning_rate": 3.474522403364235e-05, + "loss": 0.0434, + "step": 10122, + "task_loss": 0.03757096827030182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009927413426339626, + "epoch": 9.61, + "learning_rate": 3.473541077834045e-05, + "loss": 0.0096, + "step": 10123, + "task_loss": 0.0061761606484651566 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.046677879989147186, + "epoch": 9.61, + "learning_rate": 3.4725595754554295e-05, + "loss": 0.0439, + "step": 10124, + "task_loss": 0.019169652834534645 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01028907299041748, + "epoch": 9.62, + "learning_rate": 3.471577896406683e-05, + "loss": 0.0096, + "step": 10125, + "task_loss": 0.0030922014266252518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022475387901067734, + "epoch": 9.62, + "learning_rate": 3.470596040866133e-05, + "loss": 0.0296, + "step": 10126, + "task_loss": 0.09410066902637482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010681994259357452, + "epoch": 9.62, + "learning_rate": 3.4696140090121376e-05, + "loss": 0.0101, + "step": 10127, + "task_loss": 0.004838347434997559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011722270399332047, + "epoch": 9.62, + "learning_rate": 3.468631801023088e-05, + "loss": 0.0287, + "step": 10128, + "task_loss": 0.18153433501720428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022453030571341515, + "epoch": 9.62, + "learning_rate": 3.467649417077406e-05, + "loss": 0.0258, + "step": 10129, + "task_loss": 0.05557314306497574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08180783689022064, + "epoch": 9.62, + "learning_rate": 3.466666857353547e-05, + "loss": 0.0942, + "step": 10130, + "task_loss": 0.20586249232292175 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017414044588804245, + "epoch": 9.62, + "learning_rate": 3.465684122029999e-05, + "loss": 0.0161, + "step": 10131, + "task_loss": 0.004198454320430756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.23380213975906372, + "epoch": 9.62, + "learning_rate": 3.464701211285279e-05, + "loss": 0.2391, + "step": 10132, + "task_loss": 0.2872486114501953 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013667404651641846, + "epoch": 9.62, + "learning_rate": 3.463718125297937e-05, + "loss": 0.0128, + "step": 10133, + "task_loss": 0.004925645887851715 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05431966111063957, + "epoch": 9.62, + "learning_rate": 3.462734864246557e-05, + "loss": 0.0529, + "step": 10134, + "task_loss": 0.04007718339562416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030022673308849335, + "epoch": 9.62, + "learning_rate": 3.4617514283097524e-05, + "loss": 0.0402, + "step": 10135, + "task_loss": 0.13180501759052277 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019991595298051834, + "epoch": 9.63, + "learning_rate": 3.4607678176661695e-05, + "loss": 0.0377, + "step": 10136, + "task_loss": 0.19743065536022186 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04344207048416138, + "epoch": 9.63, + "learning_rate": 3.459784032494484e-05, + "loss": 0.0485, + "step": 10137, + "task_loss": 0.09402826428413391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007493377663195133, + "epoch": 9.63, + "learning_rate": 3.458800072973408e-05, + "loss": 0.0071, + "step": 10138, + "task_loss": 0.0033276528120040894 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08084356784820557, + "epoch": 9.63, + "learning_rate": 3.45781593928168e-05, + "loss": 0.0823, + "step": 10139, + "task_loss": 0.09574754536151886 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01496205572038889, + "epoch": 9.63, + "learning_rate": 3.4568316315980745e-05, + "loss": 0.0204, + "step": 10140, + "task_loss": 0.06947852671146393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024571670219302177, + "epoch": 9.63, + "learning_rate": 3.455847150101395e-05, + "loss": 0.0364, + "step": 10141, + "task_loss": 0.14293412864208221 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03470940142869949, + "epoch": 9.63, + "learning_rate": 3.454862494970477e-05, + "loss": 0.0461, + "step": 10142, + "task_loss": 0.1487175077199936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014040854759514332, + "epoch": 9.63, + "learning_rate": 3.4538776663841875e-05, + "loss": 0.0133, + "step": 10143, + "task_loss": 0.006179919466376305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02388843148946762, + "epoch": 9.63, + "learning_rate": 3.452892664521427e-05, + "loss": 0.0248, + "step": 10144, + "task_loss": 0.032703425735235214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007051503751426935, + "epoch": 9.63, + "learning_rate": 3.4519074895611244e-05, + "loss": 0.0067, + "step": 10145, + "task_loss": 0.004012970253825188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013478701002895832, + "epoch": 9.64, + "learning_rate": 3.4509221416822415e-05, + "loss": 0.0126, + "step": 10146, + "task_loss": 0.004567300900816917 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044648945331573486, + "epoch": 9.64, + "learning_rate": 3.4499366210637725e-05, + "loss": 0.0537, + "step": 10147, + "task_loss": 0.13514164090156555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017728157341480255, + "epoch": 9.64, + "learning_rate": 3.4489509278847414e-05, + "loss": 0.024, + "step": 10148, + "task_loss": 0.08038611710071564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0526009202003479, + "epoch": 9.64, + "learning_rate": 3.4479650623242036e-05, + "loss": 0.0529, + "step": 10149, + "task_loss": 0.05552627891302109 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031424228101968765, + "epoch": 9.64, + "learning_rate": 3.446979024561246e-05, + "loss": 0.0368, + "step": 10150, + "task_loss": 0.0853959321975708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08394592255353928, + "epoch": 9.64, + "learning_rate": 3.44599281477499e-05, + "loss": 0.08, + "step": 10151, + "task_loss": 0.044918350875377655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06996019929647446, + "epoch": 9.64, + "learning_rate": 3.4450064331445814e-05, + "loss": 0.0865, + "step": 10152, + "task_loss": 0.23547132313251495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.104726642370224, + "epoch": 9.64, + "learning_rate": 3.444019879849204e-05, + "loss": 0.1095, + "step": 10153, + "task_loss": 0.15200763940811157 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06347490102052689, + "epoch": 9.64, + "learning_rate": 3.443033155068069e-05, + "loss": 0.0633, + "step": 10154, + "task_loss": 0.06222536787390709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10413339734077454, + "epoch": 9.64, + "learning_rate": 3.442046258980419e-05, + "loss": 0.1025, + "step": 10155, + "task_loss": 0.08788105845451355 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04322014003992081, + "epoch": 9.64, + "learning_rate": 3.4410591917655296e-05, + "loss": 0.0468, + "step": 10156, + "task_loss": 0.07900369167327881 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051760174334049225, + "epoch": 9.65, + "learning_rate": 3.4400719536027056e-05, + "loss": 0.0559, + "step": 10157, + "task_loss": 0.09334629774093628 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020617887377738953, + "epoch": 9.65, + "learning_rate": 3.4390845446712836e-05, + "loss": 0.0218, + "step": 10158, + "task_loss": 0.032847531139850616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.040341269224882126, + "epoch": 9.65, + "learning_rate": 3.438096965150632e-05, + "loss": 0.0382, + "step": 10159, + "task_loss": 0.018778573721647263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01803889125585556, + "epoch": 9.65, + "learning_rate": 3.4371092152201485e-05, + "loss": 0.0195, + "step": 10160, + "task_loss": 0.03258706256747246 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05206110328435898, + "epoch": 9.65, + "learning_rate": 3.4361212950592624e-05, + "loss": 0.0492, + "step": 10161, + "task_loss": 0.023629793897271156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011896253563463688, + "epoch": 9.65, + "learning_rate": 3.435133204847435e-05, + "loss": 0.0111, + "step": 10162, + "task_loss": 0.00429350882768631 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019481834024190903, + "epoch": 9.65, + "learning_rate": 3.4341449447641575e-05, + "loss": 0.0182, + "step": 10163, + "task_loss": 0.006555115804076195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044923312962055206, + "epoch": 9.65, + "learning_rate": 3.433156514988951e-05, + "loss": 0.0486, + "step": 10164, + "task_loss": 0.0811932161450386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026373110711574554, + "epoch": 9.65, + "learning_rate": 3.432167915701371e-05, + "loss": 0.0293, + "step": 10165, + "task_loss": 0.05567855015397072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015387404710054398, + "epoch": 9.65, + "learning_rate": 3.431179147080999e-05, + "loss": 0.0244, + "step": 10166, + "task_loss": 0.10537419468164444 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05314071476459503, + "epoch": 9.66, + "learning_rate": 3.4301902093074504e-05, + "loss": 0.0508, + "step": 10167, + "task_loss": 0.030011305585503578 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020540203899145126, + "epoch": 9.66, + "learning_rate": 3.42920110256037e-05, + "loss": 0.0278, + "step": 10168, + "task_loss": 0.09346860647201538 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05021016672253609, + "epoch": 9.66, + "learning_rate": 3.428211827019434e-05, + "loss": 0.0555, + "step": 10169, + "task_loss": 0.10355982184410095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00913775060325861, + "epoch": 9.66, + "learning_rate": 3.42722238286435e-05, + "loss": 0.0171, + "step": 10170, + "task_loss": 0.08899959921836853 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03822232410311699, + "epoch": 9.66, + "learning_rate": 3.426232770274855e-05, + "loss": 0.04, + "step": 10171, + "task_loss": 0.05588465929031372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010778695344924927, + "epoch": 9.66, + "learning_rate": 3.4252429894307154e-05, + "loss": 0.0185, + "step": 10172, + "task_loss": 0.08806253969669342 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025916989892721176, + "epoch": 9.66, + "learning_rate": 3.424253040511731e-05, + "loss": 0.0386, + "step": 10173, + "task_loss": 0.1526973396539688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015541073866188526, + "epoch": 9.66, + "learning_rate": 3.4232629236977316e-05, + "loss": 0.0146, + "step": 10174, + "task_loss": 0.006028560921549797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0131698502227664, + "epoch": 9.66, + "learning_rate": 3.4222726391685746e-05, + "loss": 0.0269, + "step": 10175, + "task_loss": 0.15060535073280334 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01671590656042099, + "epoch": 9.66, + "learning_rate": 3.42128218710415e-05, + "loss": 0.019, + "step": 10176, + "task_loss": 0.039641670882701874 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009115570224821568, + "epoch": 9.66, + "learning_rate": 3.420291567684381e-05, + "loss": 0.0088, + "step": 10177, + "task_loss": 0.00632680207490921 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08051498234272003, + "epoch": 9.67, + "learning_rate": 3.419300781089216e-05, + "loss": 0.0786, + "step": 10178, + "task_loss": 0.061485860496759415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03897204250097275, + "epoch": 9.67, + "learning_rate": 3.418309827498637e-05, + "loss": 0.0359, + "step": 10179, + "task_loss": 0.008243357762694359 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011700714007019997, + "epoch": 9.67, + "learning_rate": 3.4173187070926546e-05, + "loss": 0.0111, + "step": 10180, + "task_loss": 0.005291668698191643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012526036240160465, + "epoch": 9.67, + "learning_rate": 3.4163274200513116e-05, + "loss": 0.0186, + "step": 10181, + "task_loss": 0.07343530654907227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017924156039953232, + "epoch": 9.67, + "learning_rate": 3.415335966554679e-05, + "loss": 0.0179, + "step": 10182, + "task_loss": 0.017576100304722786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06711853295564651, + "epoch": 9.67, + "learning_rate": 3.414344346782861e-05, + "loss": 0.0728, + "step": 10183, + "task_loss": 0.12400171160697937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017617417499423027, + "epoch": 9.67, + "learning_rate": 3.413352560915988e-05, + "loss": 0.0164, + "step": 10184, + "task_loss": 0.005933165550231934 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04335098713636398, + "epoch": 9.67, + "learning_rate": 3.412360609134223e-05, + "loss": 0.0492, + "step": 10185, + "task_loss": 0.10230584442615509 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01754794456064701, + "epoch": 9.67, + "learning_rate": 3.411368491617761e-05, + "loss": 0.0165, + "step": 10186, + "task_loss": 0.006676128134131432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08341458439826965, + "epoch": 9.67, + "learning_rate": 3.410376208546822e-05, + "loss": 0.0941, + "step": 10187, + "task_loss": 0.19037111103534698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012632082216441631, + "epoch": 9.68, + "learning_rate": 3.409383760101661e-05, + "loss": 0.0321, + "step": 10188, + "task_loss": 0.20703136920928955 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05429381877183914, + "epoch": 9.68, + "learning_rate": 3.4083911464625596e-05, + "loss": 0.0664, + "step": 10189, + "task_loss": 0.17530539631843567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01827721670269966, + "epoch": 9.68, + "learning_rate": 3.407398367809832e-05, + "loss": 0.0346, + "step": 10190, + "task_loss": 0.18168266117572784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015146693214774132, + "epoch": 9.68, + "learning_rate": 3.406405424323821e-05, + "loss": 0.0274, + "step": 10191, + "task_loss": 0.13719764351844788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05147261545062065, + "epoch": 9.68, + "learning_rate": 3.4054123161848995e-05, + "loss": 0.059, + "step": 10192, + "task_loss": 0.12712138891220093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024947544559836388, + "epoch": 9.68, + "learning_rate": 3.4044190435734695e-05, + "loss": 0.0233, + "step": 10193, + "task_loss": 0.008586343377828598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010366448201239109, + "epoch": 9.68, + "learning_rate": 3.403425606669965e-05, + "loss": 0.0099, + "step": 10194, + "task_loss": 0.005786292254924774 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08639805763959885, + "epoch": 9.68, + "learning_rate": 3.4024320056548475e-05, + "loss": 0.0934, + "step": 10195, + "task_loss": 0.15593896806240082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04102171212434769, + "epoch": 9.68, + "learning_rate": 3.401438240708611e-05, + "loss": 0.0455, + "step": 10196, + "task_loss": 0.08535470068454742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02099025435745716, + "epoch": 9.68, + "learning_rate": 3.400444312011776e-05, + "loss": 0.0239, + "step": 10197, + "task_loss": 0.049628641456365585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07851079851388931, + "epoch": 9.68, + "learning_rate": 3.399450219744896e-05, + "loss": 0.0751, + "step": 10198, + "task_loss": 0.044152356684207916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039639078080654144, + "epoch": 9.69, + "learning_rate": 3.3984559640885505e-05, + "loss": 0.0609, + "step": 10199, + "task_loss": 0.2518640458583832 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03628784418106079, + "epoch": 9.69, + "learning_rate": 3.3974615452233526e-05, + "loss": 0.0336, + "step": 10200, + "task_loss": 0.009241720661520958 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07165795564651489, + "epoch": 9.69, + "learning_rate": 3.396466963329944e-05, + "loss": 0.071, + "step": 10201, + "task_loss": 0.06518401205539703 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026513490825891495, + "epoch": 9.69, + "learning_rate": 3.395472218588992e-05, + "loss": 0.0297, + "step": 10202, + "task_loss": 0.05858299508690834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023471450433135033, + "epoch": 9.69, + "learning_rate": 3.394477311181201e-05, + "loss": 0.0217, + "step": 10203, + "task_loss": 0.005670515820384026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08482164889574051, + "epoch": 9.69, + "learning_rate": 3.393482241287297e-05, + "loss": 0.0974, + "step": 10204, + "task_loss": 0.21033701300621033 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05313311517238617, + "epoch": 9.69, + "learning_rate": 3.392487009088041e-05, + "loss": 0.0544, + "step": 10205, + "task_loss": 0.06595422327518463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03181048855185509, + "epoch": 9.69, + "learning_rate": 3.391491614764222e-05, + "loss": 0.0369, + "step": 10206, + "task_loss": 0.08310259878635406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010201860219240189, + "epoch": 9.69, + "learning_rate": 3.390496058496657e-05, + "loss": 0.018, + "step": 10207, + "task_loss": 0.0884314775466919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02600035071372986, + "epoch": 9.69, + "learning_rate": 3.3895003404661955e-05, + "loss": 0.0239, + "step": 10208, + "task_loss": 0.005447834730148315 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04286601021885872, + "epoch": 9.7, + "learning_rate": 3.3885044608537125e-05, + "loss": 0.0517, + "step": 10209, + "task_loss": 0.13154464960098267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0452398955821991, + "epoch": 9.7, + "learning_rate": 3.387508419840115e-05, + "loss": 0.0499, + "step": 10210, + "task_loss": 0.0916098803281784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04269058629870415, + "epoch": 9.7, + "learning_rate": 3.386512217606339e-05, + "loss": 0.0601, + "step": 10211, + "task_loss": 0.2167372852563858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06171311438083649, + "epoch": 9.7, + "learning_rate": 3.385515854333349e-05, + "loss": 0.0662, + "step": 10212, + "task_loss": 0.10651151090860367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1391063630580902, + "epoch": 9.7, + "learning_rate": 3.38451933020214e-05, + "loss": 0.135, + "step": 10213, + "task_loss": 0.09796366840600967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05202070623636246, + "epoch": 9.7, + "learning_rate": 3.383522645393734e-05, + "loss": 0.0639, + "step": 10214, + "task_loss": 0.17111501097679138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04103817790746689, + "epoch": 9.7, + "learning_rate": 3.3825258000891846e-05, + "loss": 0.0503, + "step": 10215, + "task_loss": 0.13386118412017822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026339948177337646, + "epoch": 9.7, + "learning_rate": 3.381528794469574e-05, + "loss": 0.0335, + "step": 10216, + "task_loss": 0.09778992086648941 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009569020941853523, + "epoch": 9.7, + "learning_rate": 3.380531628716012e-05, + "loss": 0.0091, + "step": 10217, + "task_loss": 0.004733014851808548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05844016373157501, + "epoch": 9.7, + "learning_rate": 3.3795343030096384e-05, + "loss": 0.0596, + "step": 10218, + "task_loss": 0.06965689361095428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.061011701822280884, + "epoch": 9.7, + "learning_rate": 3.3785368175316226e-05, + "loss": 0.0728, + "step": 10219, + "task_loss": 0.1784566342830658 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01999977044761181, + "epoch": 9.71, + "learning_rate": 3.377539172463164e-05, + "loss": 0.0248, + "step": 10220, + "task_loss": 0.06758248805999756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019030677154660225, + "epoch": 9.71, + "learning_rate": 3.376541367985488e-05, + "loss": 0.0196, + "step": 10221, + "task_loss": 0.02473231591284275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03495609387755394, + "epoch": 9.71, + "learning_rate": 3.3755434042798506e-05, + "loss": 0.033, + "step": 10222, + "task_loss": 0.01556423120200634 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1749984323978424, + "epoch": 9.71, + "learning_rate": 3.374545281527538e-05, + "loss": 0.1663, + "step": 10223, + "task_loss": 0.08834327012300491 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012353798374533653, + "epoch": 9.71, + "learning_rate": 3.3735469999098615e-05, + "loss": 0.0178, + "step": 10224, + "task_loss": 0.06693007051944733 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08601576089859009, + "epoch": 9.71, + "learning_rate": 3.372548559608166e-05, + "loss": 0.0848, + "step": 10225, + "task_loss": 0.07415040582418442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03538770601153374, + "epoch": 9.71, + "learning_rate": 3.3715499608038234e-05, + "loss": 0.0404, + "step": 10226, + "task_loss": 0.08567916601896286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025410983711481094, + "epoch": 9.71, + "learning_rate": 3.370551203678233e-05, + "loss": 0.0321, + "step": 10227, + "task_loss": 0.09199456870555878 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07486201822757721, + "epoch": 9.71, + "learning_rate": 3.369552288412822e-05, + "loss": 0.083, + "step": 10228, + "task_loss": 0.15588508546352386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05014657974243164, + "epoch": 9.71, + "learning_rate": 3.368553215189052e-05, + "loss": 0.0607, + "step": 10229, + "task_loss": 0.155786395072937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044199906289577484, + "epoch": 9.72, + "learning_rate": 3.367553984188407e-05, + "loss": 0.0514, + "step": 10230, + "task_loss": 0.11591973900794983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02194777876138687, + "epoch": 9.72, + "learning_rate": 3.366554595592402e-05, + "loss": 0.0213, + "step": 10231, + "task_loss": 0.01566997356712818 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06304118037223816, + "epoch": 9.72, + "learning_rate": 3.365555049582582e-05, + "loss": 0.0571, + "step": 10232, + "task_loss": 0.003476545214653015 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05096285790205002, + "epoch": 9.72, + "learning_rate": 3.364555346340518e-05, + "loss": 0.0523, + "step": 10233, + "task_loss": 0.06410223990678787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.061933599412441254, + "epoch": 9.72, + "learning_rate": 3.3635554860478126e-05, + "loss": 0.0585, + "step": 10234, + "task_loss": 0.02772807516157627 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.16535219550132751, + "epoch": 9.72, + "learning_rate": 3.362555468886093e-05, + "loss": 0.1643, + "step": 10235, + "task_loss": 0.1545773446559906 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039442308247089386, + "epoch": 9.72, + "learning_rate": 3.361555295037019e-05, + "loss": 0.047, + "step": 10236, + "task_loss": 0.11461587995290756 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019081950187683105, + "epoch": 9.72, + "learning_rate": 3.360554964682276e-05, + "loss": 0.0304, + "step": 10237, + "task_loss": 0.13198192417621613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02817378006875515, + "epoch": 9.72, + "learning_rate": 3.359554478003579e-05, + "loss": 0.038, + "step": 10238, + "task_loss": 0.1261017769575119 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024465490132570267, + "epoch": 9.72, + "learning_rate": 3.358553835182673e-05, + "loss": 0.0231, + "step": 10239, + "task_loss": 0.01032637245953083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026946038007736206, + "epoch": 9.72, + "learning_rate": 3.357553036401326e-05, + "loss": 0.0259, + "step": 10240, + "task_loss": 0.016648059710860252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10884161293506622, + "epoch": 9.73, + "learning_rate": 3.356552081841341e-05, + "loss": 0.1042, + "step": 10241, + "task_loss": 0.062114790081977844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044160258024930954, + "epoch": 9.73, + "learning_rate": 3.355550971684545e-05, + "loss": 0.0446, + "step": 10242, + "task_loss": 0.048725277185440063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016672126948833466, + "epoch": 9.73, + "learning_rate": 3.3545497061127946e-05, + "loss": 0.0278, + "step": 10243, + "task_loss": 0.12836672365665436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025100041180849075, + "epoch": 9.73, + "learning_rate": 3.353548285307975e-05, + "loss": 0.0276, + "step": 10244, + "task_loss": 0.050310954451560974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07603813707828522, + "epoch": 9.73, + "learning_rate": 3.352546709451998e-05, + "loss": 0.0717, + "step": 10245, + "task_loss": 0.03274556249380112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06987538933753967, + "epoch": 9.73, + "learning_rate": 3.351544978726805e-05, + "loss": 0.0762, + "step": 10246, + "task_loss": 0.1328875869512558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01046474277973175, + "epoch": 9.73, + "learning_rate": 3.350543093314366e-05, + "loss": 0.0115, + "step": 10247, + "task_loss": 0.020672082901000977 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018444234505295753, + "epoch": 9.73, + "learning_rate": 3.349541053396678e-05, + "loss": 0.0198, + "step": 10248, + "task_loss": 0.03211362287402153 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13752481341362, + "epoch": 9.73, + "learning_rate": 3.348538859155766e-05, + "loss": 0.1363, + "step": 10249, + "task_loss": 0.12482313811779022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024666164070367813, + "epoch": 9.73, + "learning_rate": 3.347536510773684e-05, + "loss": 0.031, + "step": 10250, + "task_loss": 0.08787593245506287 + }, + { + "epoch": 9.73, + "eval_accuracy": 0.8910550458715596, + "eval_loss": 0.4798487424850464, + "eval_runtime": 18.1145, + "eval_samples_per_second": 48.138, + "eval_steps_per_second": 6.017, + "step": 10250 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026560328900814056, + "epoch": 9.74, + "learning_rate": 3.346534008432513e-05, + "loss": 0.0258, + "step": 10251, + "task_loss": 0.018506888300180435 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017583798617124557, + "epoch": 9.74, + "learning_rate": 3.3455313523143615e-05, + "loss": 0.0163, + "step": 10252, + "task_loss": 0.004295343533158302 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06413343548774719, + "epoch": 9.74, + "learning_rate": 3.3445285426013685e-05, + "loss": 0.0662, + "step": 10253, + "task_loss": 0.08491207659244537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07041002064943314, + "epoch": 9.74, + "learning_rate": 3.343525579475698e-05, + "loss": 0.0762, + "step": 10254, + "task_loss": 0.12783107161521912 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021717432886362076, + "epoch": 9.74, + "learning_rate": 3.342522463119543e-05, + "loss": 0.0355, + "step": 10255, + "task_loss": 0.15983924269676208 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016216669231653214, + "epoch": 9.74, + "learning_rate": 3.341519193715127e-05, + "loss": 0.0152, + "step": 10256, + "task_loss": 0.00632026232779026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03318667411804199, + "epoch": 9.74, + "learning_rate": 3.340515771444695e-05, + "loss": 0.0442, + "step": 10257, + "task_loss": 0.14321547746658325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0912448838353157, + "epoch": 9.74, + "learning_rate": 3.3395121964905265e-05, + "loss": 0.0919, + "step": 10258, + "task_loss": 0.09797343611717224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09608903527259827, + "epoch": 9.74, + "learning_rate": 3.338508469034922e-05, + "loss": 0.1046, + "step": 10259, + "task_loss": 0.18118667602539062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04384881630539894, + "epoch": 9.74, + "learning_rate": 3.3375045892602176e-05, + "loss": 0.0422, + "step": 10260, + "task_loss": 0.02693062275648117 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023890579119324684, + "epoch": 9.74, + "learning_rate": 3.3365005573487706e-05, + "loss": 0.0328, + "step": 10261, + "task_loss": 0.11346684396266937 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027102213352918625, + "epoch": 9.75, + "learning_rate": 3.335496373482969e-05, + "loss": 0.0256, + "step": 10262, + "task_loss": 0.012507695704698563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07778482139110565, + "epoch": 9.75, + "learning_rate": 3.334492037845227e-05, + "loss": 0.0737, + "step": 10263, + "task_loss": 0.037038303911685944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0290234312415123, + "epoch": 9.75, + "learning_rate": 3.333487550617987e-05, + "loss": 0.0362, + "step": 10264, + "task_loss": 0.10083475708961487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013366115279495716, + "epoch": 9.75, + "learning_rate": 3.332482911983721e-05, + "loss": 0.0137, + "step": 10265, + "task_loss": 0.01663101837038994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08686914294958115, + "epoch": 9.75, + "learning_rate": 3.331478122124924e-05, + "loss": 0.0917, + "step": 10266, + "task_loss": 0.13518640398979187 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01988716423511505, + "epoch": 9.75, + "learning_rate": 3.330473181224121e-05, + "loss": 0.0226, + "step": 10267, + "task_loss": 0.04693231359124184 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03620753064751625, + "epoch": 9.75, + "learning_rate": 3.3294680894638655e-05, + "loss": 0.0332, + "step": 10268, + "task_loss": 0.0058042556047439575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08979254215955734, + "epoch": 9.75, + "learning_rate": 3.328462847026736e-05, + "loss": 0.0962, + "step": 10269, + "task_loss": 0.15426769852638245 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030872471630573273, + "epoch": 9.75, + "learning_rate": 3.327457454095342e-05, + "loss": 0.0325, + "step": 10270, + "task_loss": 0.04736156761646271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010424168780446053, + "epoch": 9.75, + "learning_rate": 3.3264519108523154e-05, + "loss": 0.0103, + "step": 10271, + "task_loss": 0.009468691423535347 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03220530226826668, + "epoch": 9.75, + "learning_rate": 3.3254462174803186e-05, + "loss": 0.0311, + "step": 10272, + "task_loss": 0.021126460283994675 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03830189257860184, + "epoch": 9.76, + "learning_rate": 3.324440374162041e-05, + "loss": 0.0357, + "step": 10273, + "task_loss": 0.012562312185764313 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03549380600452423, + "epoch": 9.76, + "learning_rate": 3.323434381080199e-05, + "loss": 0.0339, + "step": 10274, + "task_loss": 0.019155368208885193 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0406593419611454, + "epoch": 9.76, + "learning_rate": 3.322428238417537e-05, + "loss": 0.0548, + "step": 10275, + "task_loss": 0.18189404904842377 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08273656666278839, + "epoch": 9.76, + "learning_rate": 3.321421946356823e-05, + "loss": 0.0821, + "step": 10276, + "task_loss": 0.07649406045675278 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047196559607982635, + "epoch": 9.76, + "learning_rate": 3.320415505080858e-05, + "loss": 0.0473, + "step": 10277, + "task_loss": 0.048162683844566345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0971732959151268, + "epoch": 9.76, + "learning_rate": 3.3194089147724644e-05, + "loss": 0.1024, + "step": 10278, + "task_loss": 0.1492057889699936 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008488696068525314, + "epoch": 9.76, + "learning_rate": 3.3184021756144954e-05, + "loss": 0.0085, + "step": 10279, + "task_loss": 0.008146345615386963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03513052687048912, + "epoch": 9.76, + "learning_rate": 3.317395287789829e-05, + "loss": 0.0421, + "step": 10280, + "task_loss": 0.1049308255314827 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.060586679726839066, + "epoch": 9.76, + "learning_rate": 3.316388251481373e-05, + "loss": 0.0639, + "step": 10281, + "task_loss": 0.09365570545196533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02429119497537613, + "epoch": 9.76, + "learning_rate": 3.3153810668720594e-05, + "loss": 0.0301, + "step": 10282, + "task_loss": 0.0827622190117836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02623087167739868, + "epoch": 9.77, + "learning_rate": 3.3143737341448475e-05, + "loss": 0.0325, + "step": 10283, + "task_loss": 0.0885535329580307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09432581067085266, + "epoch": 9.77, + "learning_rate": 3.3133662534827255e-05, + "loss": 0.0897, + "step": 10284, + "task_loss": 0.04778246581554413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.040426380932331085, + "epoch": 9.77, + "learning_rate": 3.3123586250687055e-05, + "loss": 0.0396, + "step": 10285, + "task_loss": 0.031931400299072266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047953128814697266, + "epoch": 9.77, + "learning_rate": 3.311350849085829e-05, + "loss": 0.0458, + "step": 10286, + "task_loss": 0.026047490537166595 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041989296674728394, + "epoch": 9.77, + "learning_rate": 3.3103429257171635e-05, + "loss": 0.0387, + "step": 10287, + "task_loss": 0.009208640083670616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023370787501335144, + "epoch": 9.77, + "learning_rate": 3.309334855145803e-05, + "loss": 0.031, + "step": 10288, + "task_loss": 0.09938006848096848 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07616433501243591, + "epoch": 9.77, + "learning_rate": 3.3083266375548675e-05, + "loss": 0.0742, + "step": 10289, + "task_loss": 0.05699377879500389 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13219673931598663, + "epoch": 9.77, + "learning_rate": 3.3073182731275064e-05, + "loss": 0.1273, + "step": 10290, + "task_loss": 0.08289719372987747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.043332893401384354, + "epoch": 9.77, + "learning_rate": 3.306309762046892e-05, + "loss": 0.0515, + "step": 10291, + "task_loss": 0.1250426173210144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10248932242393494, + "epoch": 9.77, + "learning_rate": 3.305301104496227e-05, + "loss": 0.0988, + "step": 10292, + "task_loss": 0.06527666747570038 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01327238418161869, + "epoch": 9.77, + "learning_rate": 3.3042923006587366e-05, + "loss": 0.0264, + "step": 10293, + "task_loss": 0.145028218626976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015359059907495975, + "epoch": 9.78, + "learning_rate": 3.303283350717678e-05, + "loss": 0.0216, + "step": 10294, + "task_loss": 0.07747124135494232 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019833415746688843, + "epoch": 9.78, + "learning_rate": 3.302274254856329e-05, + "loss": 0.0288, + "step": 10295, + "task_loss": 0.10919887572526932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015060881152749062, + "epoch": 9.78, + "learning_rate": 3.301265013257998e-05, + "loss": 0.0139, + "step": 10296, + "task_loss": 0.0035025514662265778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13087275624275208, + "epoch": 9.78, + "learning_rate": 3.300255626106019e-05, + "loss": 0.1465, + "step": 10297, + "task_loss": 0.2874350845813751 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02529519982635975, + "epoch": 9.78, + "learning_rate": 3.2992460935837505e-05, + "loss": 0.0235, + "step": 10298, + "task_loss": 0.0074701253324747086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011823216453194618, + "epoch": 9.78, + "learning_rate": 3.2982364158745805e-05, + "loss": 0.011, + "step": 10299, + "task_loss": 0.003194596618413925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05281532555818558, + "epoch": 9.78, + "learning_rate": 3.297226593161921e-05, + "loss": 0.0509, + "step": 10300, + "task_loss": 0.033622805029153824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013377421535551548, + "epoch": 9.78, + "learning_rate": 3.2962166256292113e-05, + "loss": 0.0233, + "step": 10301, + "task_loss": 0.11291710287332535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011167014017701149, + "epoch": 9.78, + "learning_rate": 3.295206513459917e-05, + "loss": 0.0191, + "step": 10302, + "task_loss": 0.09001649916172028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019459430128335953, + "epoch": 9.78, + "learning_rate": 3.2941962568375296e-05, + "loss": 0.018, + "step": 10303, + "task_loss": 0.005041791126132011 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014631148427724838, + "epoch": 9.79, + "learning_rate": 3.2931858559455674e-05, + "loss": 0.0183, + "step": 10304, + "task_loss": 0.05133789777755737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08178240060806274, + "epoch": 9.79, + "learning_rate": 3.292175310967575e-05, + "loss": 0.085, + "step": 10305, + "task_loss": 0.11350104212760925 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029704544693231583, + "epoch": 9.79, + "learning_rate": 3.291164622087122e-05, + "loss": 0.039, + "step": 10306, + "task_loss": 0.12229295074939728 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007734452374279499, + "epoch": 9.79, + "learning_rate": 3.290153789487804e-05, + "loss": 0.0074, + "step": 10307, + "task_loss": 0.004266131669282913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01227061077952385, + "epoch": 9.79, + "learning_rate": 3.289142813353246e-05, + "loss": 0.0113, + "step": 10308, + "task_loss": 0.0023228712379932404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016713295131921768, + "epoch": 9.79, + "learning_rate": 3.2881316938670945e-05, + "loss": 0.0228, + "step": 10309, + "task_loss": 0.07756958901882172 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031723689287900925, + "epoch": 9.79, + "learning_rate": 3.2871204312130254e-05, + "loss": 0.032, + "step": 10310, + "task_loss": 0.03421415388584137 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015944186598062515, + "epoch": 9.79, + "learning_rate": 3.28610902557474e-05, + "loss": 0.0221, + "step": 10311, + "task_loss": 0.07747285813093185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04275937378406525, + "epoch": 9.79, + "learning_rate": 3.285097477135963e-05, + "loss": 0.0503, + "step": 10312, + "task_loss": 0.11856701970100403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023751258850097656, + "epoch": 9.79, + "learning_rate": 3.284085786080449e-05, + "loss": 0.023, + "step": 10313, + "task_loss": 0.01643642783164978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019033825024962425, + "epoch": 9.79, + "learning_rate": 3.283073952591975e-05, + "loss": 0.0185, + "step": 10314, + "task_loss": 0.013599434867501259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.13899603486061096, + "epoch": 9.8, + "learning_rate": 3.2820619768543473e-05, + "loss": 0.1415, + "step": 10315, + "task_loss": 0.1637457311153412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09794485569000244, + "epoch": 9.8, + "learning_rate": 3.281049859051394e-05, + "loss": 0.0938, + "step": 10316, + "task_loss": 0.0560276135802269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018764419481158257, + "epoch": 9.8, + "learning_rate": 3.280037599366972e-05, + "loss": 0.0268, + "step": 10317, + "task_loss": 0.09955213218927383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11311816424131393, + "epoch": 9.8, + "learning_rate": 3.2790251979849654e-05, + "loss": 0.1127, + "step": 10318, + "task_loss": 0.10870064795017242 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10437528789043427, + "epoch": 9.8, + "learning_rate": 3.278012655089277e-05, + "loss": 0.126, + "step": 10319, + "task_loss": 0.3202948570251465 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009548291563987732, + "epoch": 9.8, + "learning_rate": 3.276999970863845e-05, + "loss": 0.0129, + "step": 10320, + "task_loss": 0.04352171719074249 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01687040366232395, + "epoch": 9.8, + "learning_rate": 3.275987145492625e-05, + "loss": 0.0159, + "step": 10321, + "task_loss": 0.007143234834074974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047225743532180786, + "epoch": 9.8, + "learning_rate": 3.274974179159603e-05, + "loss": 0.0458, + "step": 10322, + "task_loss": 0.0325019434094429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03787485137581825, + "epoch": 9.8, + "learning_rate": 3.27396107204879e-05, + "loss": 0.0505, + "step": 10323, + "task_loss": 0.16443775594234467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024566274136304855, + "epoch": 9.8, + "learning_rate": 3.2729478243442194e-05, + "loss": 0.0231, + "step": 10324, + "task_loss": 0.009893251582980156 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018858332186937332, + "epoch": 9.81, + "learning_rate": 3.271934436229955e-05, + "loss": 0.0259, + "step": 10325, + "task_loss": 0.0887862890958786 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012717785313725471, + "epoch": 9.81, + "learning_rate": 3.270920907890082e-05, + "loss": 0.0124, + "step": 10326, + "task_loss": 0.00914292223751545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008497057482600212, + "epoch": 9.81, + "learning_rate": 3.269907239508714e-05, + "loss": 0.0103, + "step": 10327, + "task_loss": 0.026958035305142403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013917407020926476, + "epoch": 9.81, + "learning_rate": 3.268893431269987e-05, + "loss": 0.0173, + "step": 10328, + "task_loss": 0.047798462212085724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022593341767787933, + "epoch": 9.81, + "learning_rate": 3.2678794833580654e-05, + "loss": 0.0267, + "step": 10329, + "task_loss": 0.0638725757598877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020902365446090698, + "epoch": 9.81, + "learning_rate": 3.2668653959571384e-05, + "loss": 0.0272, + "step": 10330, + "task_loss": 0.08406180143356323 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021388908848166466, + "epoch": 9.81, + "learning_rate": 3.2658511692514184e-05, + "loss": 0.0294, + "step": 10331, + "task_loss": 0.10142026096582413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03885917365550995, + "epoch": 9.81, + "learning_rate": 3.2648368034251454e-05, + "loss": 0.0584, + "step": 10332, + "task_loss": 0.23429960012435913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06736599653959274, + "epoch": 9.81, + "learning_rate": 3.263822298662583e-05, + "loss": 0.0732, + "step": 10333, + "task_loss": 0.12564432621002197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01693696156144142, + "epoch": 9.81, + "learning_rate": 3.2628076551480216e-05, + "loss": 0.0215, + "step": 10334, + "task_loss": 0.06282781064510345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03203189745545387, + "epoch": 9.81, + "learning_rate": 3.2617928730657764e-05, + "loss": 0.0306, + "step": 10335, + "task_loss": 0.018122676759958267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01440800167620182, + "epoch": 9.82, + "learning_rate": 3.260777952600186e-05, + "loss": 0.0135, + "step": 10336, + "task_loss": 0.004851058125495911 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04675164446234703, + "epoch": 9.82, + "learning_rate": 3.2597628939356175e-05, + "loss": 0.0745, + "step": 10337, + "task_loss": 0.3238287568092346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012276804074645042, + "epoch": 9.82, + "learning_rate": 3.25874769725646e-05, + "loss": 0.0225, + "step": 10338, + "task_loss": 0.114773690700531 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013100717216730118, + "epoch": 9.82, + "learning_rate": 3.257732362747129e-05, + "loss": 0.0128, + "step": 10339, + "task_loss": 0.009668847545981407 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07656170427799225, + "epoch": 9.82, + "learning_rate": 3.256716890592065e-05, + "loss": 0.0814, + "step": 10340, + "task_loss": 0.1245703175663948 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09409487247467041, + "epoch": 9.82, + "learning_rate": 3.255701280975733e-05, + "loss": 0.0905, + "step": 10341, + "task_loss": 0.05802832543849945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00820472463965416, + "epoch": 9.82, + "learning_rate": 3.2546855340826246e-05, + "loss": 0.0078, + "step": 10342, + "task_loss": 0.004141604527831078 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0564790740609169, + "epoch": 9.82, + "learning_rate": 3.253669650097254e-05, + "loss": 0.0648, + "step": 10343, + "task_loss": 0.13937750458717346 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.033709198236465454, + "epoch": 9.82, + "learning_rate": 3.2526536292041625e-05, + "loss": 0.0378, + "step": 10344, + "task_loss": 0.07450772076845169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.061526041477918625, + "epoch": 9.82, + "learning_rate": 3.2516374715879126e-05, + "loss": 0.0774, + "step": 10345, + "task_loss": 0.22075936198234558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029069392010569572, + "epoch": 9.83, + "learning_rate": 3.250621177433097e-05, + "loss": 0.0391, + "step": 10346, + "task_loss": 0.12960076332092285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03016573004424572, + "epoch": 9.83, + "learning_rate": 3.249604746924331e-05, + "loss": 0.0284, + "step": 10347, + "task_loss": 0.012552225962281227 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1340094357728958, + "epoch": 9.83, + "learning_rate": 3.248588180246251e-05, + "loss": 0.1435, + "step": 10348, + "task_loss": 0.22935834527015686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.046159401535987854, + "epoch": 9.83, + "learning_rate": 3.247571477583523e-05, + "loss": 0.0571, + "step": 10349, + "task_loss": 0.1553247720003128 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01781924068927765, + "epoch": 9.83, + "learning_rate": 3.2465546391208355e-05, + "loss": 0.0167, + "step": 10350, + "task_loss": 0.007019467651844025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01618257164955139, + "epoch": 9.83, + "learning_rate": 3.245537665042903e-05, + "loss": 0.0214, + "step": 10351, + "task_loss": 0.0686056837439537 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03626525402069092, + "epoch": 9.83, + "learning_rate": 3.244520555534463e-05, + "loss": 0.034, + "step": 10352, + "task_loss": 0.014076223596930504 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10587593913078308, + "epoch": 9.83, + "learning_rate": 3.243503310780278e-05, + "loss": 0.102, + "step": 10353, + "task_loss": 0.06675413995981216 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014715575613081455, + "epoch": 9.83, + "learning_rate": 3.242485930965136e-05, + "loss": 0.0137, + "step": 10354, + "task_loss": 0.004238050431013107 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0231354720890522, + "epoch": 9.83, + "learning_rate": 3.241468416273849e-05, + "loss": 0.0226, + "step": 10355, + "task_loss": 0.01763581857085228 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09283407032489777, + "epoch": 9.83, + "learning_rate": 3.2404507668912534e-05, + "loss": 0.0874, + "step": 10356, + "task_loss": 0.03887883201241493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04399856925010681, + "epoch": 9.84, + "learning_rate": 3.2394329830022095e-05, + "loss": 0.0429, + "step": 10357, + "task_loss": 0.0327279269695282 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.042265597730875015, + "epoch": 9.84, + "learning_rate": 3.238415064791603e-05, + "loss": 0.0397, + "step": 10358, + "task_loss": 0.016846617683768272 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024321310222148895, + "epoch": 9.84, + "learning_rate": 3.237397012444344e-05, + "loss": 0.0254, + "step": 10359, + "task_loss": 0.03516604006290436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03976666182279587, + "epoch": 9.84, + "learning_rate": 3.2363788261453664e-05, + "loss": 0.0445, + "step": 10360, + "task_loss": 0.08759070187807083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07277016341686249, + "epoch": 9.84, + "learning_rate": 3.2353605060796286e-05, + "loss": 0.067, + "step": 10361, + "task_loss": 0.015487806871533394 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013543391600251198, + "epoch": 9.84, + "learning_rate": 3.2343420524321134e-05, + "loss": 0.0139, + "step": 10362, + "task_loss": 0.01754959300160408 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06739618629217148, + "epoch": 9.84, + "learning_rate": 3.2333234653878275e-05, + "loss": 0.065, + "step": 10363, + "task_loss": 0.04387857764959335 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06469708681106567, + "epoch": 9.84, + "learning_rate": 3.2323047451318023e-05, + "loss": 0.0679, + "step": 10364, + "task_loss": 0.09665581583976746 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025472253561019897, + "epoch": 9.84, + "learning_rate": 3.2312858918490936e-05, + "loss": 0.0235, + "step": 10365, + "task_loss": 0.005352867767214775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08233923465013504, + "epoch": 9.84, + "learning_rate": 3.2302669057247806e-05, + "loss": 0.0887, + "step": 10366, + "task_loss": 0.14634165167808533 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09745900332927704, + "epoch": 9.85, + "learning_rate": 3.2292477869439666e-05, + "loss": 0.1002, + "step": 10367, + "task_loss": 0.1244800016283989 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0947013646364212, + "epoch": 9.85, + "learning_rate": 3.228228535691781e-05, + "loss": 0.0893, + "step": 10368, + "task_loss": 0.04106832295656204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019390946254134178, + "epoch": 9.85, + "learning_rate": 3.2272091521533745e-05, + "loss": 0.0182, + "step": 10369, + "task_loss": 0.007950548082590103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020949633792042732, + "epoch": 9.85, + "learning_rate": 3.226189636513923e-05, + "loss": 0.0202, + "step": 10370, + "task_loss": 0.013539431616663933 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015328247100114822, + "epoch": 9.85, + "learning_rate": 3.225169988958627e-05, + "loss": 0.0147, + "step": 10371, + "task_loss": 0.009431937709450722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029308216646313667, + "epoch": 9.85, + "learning_rate": 3.2241502096727095e-05, + "loss": 0.0282, + "step": 10372, + "task_loss": 0.018634788691997528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.056150779128074646, + "epoch": 9.85, + "learning_rate": 3.2231302988414194e-05, + "loss": 0.0586, + "step": 10373, + "task_loss": 0.08058308064937592 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03458091989159584, + "epoch": 9.85, + "learning_rate": 3.222110256650028e-05, + "loss": 0.0484, + "step": 10374, + "task_loss": 0.17240062355995178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06620018929243088, + "epoch": 9.85, + "learning_rate": 3.2210900832838295e-05, + "loss": 0.0788, + "step": 10375, + "task_loss": 0.1921333372592926 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04633745551109314, + "epoch": 9.85, + "learning_rate": 3.220069778928146e-05, + "loss": 0.0467, + "step": 10376, + "task_loss": 0.04954282194375992 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027524154633283615, + "epoch": 9.85, + "learning_rate": 3.2190493437683185e-05, + "loss": 0.0322, + "step": 10377, + "task_loss": 0.07475399971008301 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.062353409826755524, + "epoch": 9.86, + "learning_rate": 3.2180287779897155e-05, + "loss": 0.0616, + "step": 10378, + "task_loss": 0.05466865003108978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0365450382232666, + "epoch": 9.86, + "learning_rate": 3.217008081777726e-05, + "loss": 0.0391, + "step": 10379, + "task_loss": 0.06168461591005325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07208600640296936, + "epoch": 9.86, + "learning_rate": 3.2159872553177655e-05, + "loss": 0.0865, + "step": 10380, + "task_loss": 0.21665066480636597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013081874698400497, + "epoch": 9.86, + "learning_rate": 3.2149662987952725e-05, + "loss": 0.0183, + "step": 10381, + "task_loss": 0.0652560293674469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11617452651262283, + "epoch": 9.86, + "learning_rate": 3.213945212395707e-05, + "loss": 0.112, + "step": 10382, + "task_loss": 0.07452362775802612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02089625969529152, + "epoch": 9.86, + "learning_rate": 3.212923996304556e-05, + "loss": 0.0311, + "step": 10383, + "task_loss": 0.12253687530755997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0406123585999012, + "epoch": 9.86, + "learning_rate": 3.211902650707327e-05, + "loss": 0.0457, + "step": 10384, + "task_loss": 0.09141912311315536 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08201208710670471, + "epoch": 9.86, + "learning_rate": 3.210881175789553e-05, + "loss": 0.092, + "step": 10385, + "task_loss": 0.18144634366035461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02090279385447502, + "epoch": 9.86, + "learning_rate": 3.209859571736791e-05, + "loss": 0.0203, + "step": 10386, + "task_loss": 0.014566395431756973 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05922047048807144, + "epoch": 9.86, + "learning_rate": 3.208837838734618e-05, + "loss": 0.061, + "step": 10387, + "task_loss": 0.07677384465932846 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05778921768069267, + "epoch": 9.87, + "learning_rate": 3.207815976968638e-05, + "loss": 0.0572, + "step": 10388, + "task_loss": 0.052189189940690994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03705509752035141, + "epoch": 9.87, + "learning_rate": 3.2067939866244764e-05, + "loss": 0.0341, + "step": 10389, + "task_loss": 0.00781635195016861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022746965289115906, + "epoch": 9.87, + "learning_rate": 3.205771867887784e-05, + "loss": 0.0218, + "step": 10390, + "task_loss": 0.013284927234053612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04225531220436096, + "epoch": 9.87, + "learning_rate": 3.204749620944232e-05, + "loss": 0.0421, + "step": 10391, + "task_loss": 0.041095249354839325 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08849678933620453, + "epoch": 9.87, + "learning_rate": 3.203727245979518e-05, + "loss": 0.0892, + "step": 10392, + "task_loss": 0.09528757631778717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1014036163687706, + "epoch": 9.87, + "learning_rate": 3.20270474317936e-05, + "loss": 0.1055, + "step": 10393, + "task_loss": 0.14196741580963135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04590814188122749, + "epoch": 9.87, + "learning_rate": 3.201682112729502e-05, + "loss": 0.0474, + "step": 10394, + "task_loss": 0.06046411767601967 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03985198959708214, + "epoch": 9.87, + "learning_rate": 3.2006593548157074e-05, + "loss": 0.0387, + "step": 10395, + "task_loss": 0.028516611084342003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06808362156152725, + "epoch": 9.87, + "learning_rate": 3.1996364696237676e-05, + "loss": 0.0643, + "step": 10396, + "task_loss": 0.030522214248776436 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03122507780790329, + "epoch": 9.87, + "learning_rate": 3.198613457339493e-05, + "loss": 0.0428, + "step": 10397, + "task_loss": 0.14679580926895142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02945810928940773, + "epoch": 9.87, + "learning_rate": 3.19759031814872e-05, + "loss": 0.0363, + "step": 10398, + "task_loss": 0.09743687510490417 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02599484845995903, + "epoch": 9.88, + "learning_rate": 3.196567052237306e-05, + "loss": 0.0254, + "step": 10399, + "task_loss": 0.020352628082036972 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.22350770235061646, + "epoch": 9.88, + "learning_rate": 3.195543659791132e-05, + "loss": 0.2232, + "step": 10400, + "task_loss": 0.2200383096933365 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041732676327228546, + "epoch": 9.88, + "learning_rate": 3.194520140996102e-05, + "loss": 0.0469, + "step": 10401, + "task_loss": 0.09368692338466644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041806213557720184, + "epoch": 9.88, + "learning_rate": 3.193496496038144e-05, + "loss": 0.0622, + "step": 10402, + "task_loss": 0.24602389335632324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0394703671336174, + "epoch": 9.88, + "learning_rate": 3.1924727251032075e-05, + "loss": 0.0432, + "step": 10403, + "task_loss": 0.07703036814928055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034872762858867645, + "epoch": 9.88, + "learning_rate": 3.191448828377267e-05, + "loss": 0.0486, + "step": 10404, + "task_loss": 0.17233772575855255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10517394542694092, + "epoch": 9.88, + "learning_rate": 3.1904248060463146e-05, + "loss": 0.1068, + "step": 10405, + "task_loss": 0.12141425162553787 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010680675506591797, + "epoch": 9.88, + "learning_rate": 3.189400658296372e-05, + "loss": 0.0104, + "step": 10406, + "task_loss": 0.007806859910488129 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09319774061441422, + "epoch": 9.88, + "learning_rate": 3.188376385313479e-05, + "loss": 0.0928, + "step": 10407, + "task_loss": 0.08945365995168686 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05169279873371124, + "epoch": 9.88, + "learning_rate": 3.187351987283701e-05, + "loss": 0.0558, + "step": 10408, + "task_loss": 0.09317981451749802 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02045218087732792, + "epoch": 9.89, + "learning_rate": 3.1863274643931244e-05, + "loss": 0.0263, + "step": 10409, + "task_loss": 0.0785207599401474 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07327836751937866, + "epoch": 9.89, + "learning_rate": 3.185302816827858e-05, + "loss": 0.0837, + "step": 10410, + "task_loss": 0.17741957306861877 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012392308562994003, + "epoch": 9.89, + "learning_rate": 3.184278044774035e-05, + "loss": 0.0115, + "step": 10411, + "task_loss": 0.003290366381406784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.052759043872356415, + "epoch": 9.89, + "learning_rate": 3.183253148417808e-05, + "loss": 0.0562, + "step": 10412, + "task_loss": 0.08686379343271255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02482384815812111, + "epoch": 9.89, + "learning_rate": 3.182228127945358e-05, + "loss": 0.0357, + "step": 10413, + "task_loss": 0.13362696766853333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03491012752056122, + "epoch": 9.89, + "learning_rate": 3.1812029835428825e-05, + "loss": 0.0356, + "step": 10414, + "task_loss": 0.04205203801393509 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06706416606903076, + "epoch": 9.89, + "learning_rate": 3.1801777153966034e-05, + "loss": 0.071, + "step": 10415, + "task_loss": 0.1061612218618393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0414917916059494, + "epoch": 9.89, + "learning_rate": 3.179152323692767e-05, + "loss": 0.0383, + "step": 10416, + "task_loss": 0.009932199493050575 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022580554708838463, + "epoch": 9.89, + "learning_rate": 3.1781268086176406e-05, + "loss": 0.0401, + "step": 10417, + "task_loss": 0.1979600191116333 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07729755342006683, + "epoch": 9.89, + "learning_rate": 3.177101170357513e-05, + "loss": 0.0813, + "step": 10418, + "task_loss": 0.11744911223649979 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017039090394973755, + "epoch": 9.89, + "learning_rate": 3.1760754090986975e-05, + "loss": 0.0158, + "step": 10419, + "task_loss": 0.004911573603749275 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034923627972602844, + "epoch": 9.9, + "learning_rate": 3.175049525027527e-05, + "loss": 0.0393, + "step": 10420, + "task_loss": 0.07879183441400528 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015226260758936405, + "epoch": 9.9, + "learning_rate": 3.17402351833036e-05, + "loss": 0.0209, + "step": 10421, + "task_loss": 0.07186593115329742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04073633253574371, + "epoch": 9.9, + "learning_rate": 3.1729973891935745e-05, + "loss": 0.0493, + "step": 10422, + "task_loss": 0.12683020532131195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059145376086235046, + "epoch": 9.9, + "learning_rate": 3.1719711378035714e-05, + "loss": 0.0822, + "step": 10423, + "task_loss": 0.2894851565361023 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04453141614794731, + "epoch": 9.9, + "learning_rate": 3.1709447643467755e-05, + "loss": 0.0422, + "step": 10424, + "task_loss": 0.021708250045776367 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027283282950520515, + "epoch": 9.9, + "learning_rate": 3.1699182690096316e-05, + "loss": 0.036, + "step": 10425, + "task_loss": 0.11464565247297287 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.24350899457931519, + "epoch": 9.9, + "learning_rate": 3.168891651978609e-05, + "loss": 0.2368, + "step": 10426, + "task_loss": 0.17603522539138794 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07290820777416229, + "epoch": 9.9, + "learning_rate": 3.167864913440195e-05, + "loss": 0.0691, + "step": 10427, + "task_loss": 0.034495919942855835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05629532411694527, + "epoch": 9.9, + "learning_rate": 3.1668380535809036e-05, + "loss": 0.0606, + "step": 10428, + "task_loss": 0.09951600432395935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07032791525125504, + "epoch": 9.9, + "learning_rate": 3.165811072587267e-05, + "loss": 0.0833, + "step": 10429, + "task_loss": 0.20026040077209473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02635234221816063, + "epoch": 9.91, + "learning_rate": 3.164783970645845e-05, + "loss": 0.0334, + "step": 10430, + "task_loss": 0.09689018875360489 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014292672276496887, + "epoch": 9.91, + "learning_rate": 3.1637567479432113e-05, + "loss": 0.0284, + "step": 10431, + "task_loss": 0.15557925403118134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038801245391368866, + "epoch": 9.91, + "learning_rate": 3.1627294046659675e-05, + "loss": 0.042, + "step": 10432, + "task_loss": 0.07107855379581451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02201734110713005, + "epoch": 9.91, + "learning_rate": 3.1617019410007366e-05, + "loss": 0.0274, + "step": 10433, + "task_loss": 0.07534562796354294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01769842579960823, + "epoch": 9.91, + "learning_rate": 3.16067435713416e-05, + "loss": 0.0172, + "step": 10434, + "task_loss": 0.012379692867398262 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026350311934947968, + "epoch": 9.91, + "learning_rate": 3.159646653252906e-05, + "loss": 0.0298, + "step": 10435, + "task_loss": 0.06069865822792053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01953766867518425, + "epoch": 9.91, + "learning_rate": 3.1586188295436594e-05, + "loss": 0.0303, + "step": 10436, + "task_loss": 0.12754112482070923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03570760414004326, + "epoch": 9.91, + "learning_rate": 3.1575908861931314e-05, + "loss": 0.0438, + "step": 10437, + "task_loss": 0.11667999625205994 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018649637699127197, + "epoch": 9.91, + "learning_rate": 3.156562823388051e-05, + "loss": 0.0237, + "step": 10438, + "task_loss": 0.06956367939710617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029894903302192688, + "epoch": 9.91, + "learning_rate": 3.155534641315172e-05, + "loss": 0.0362, + "step": 10439, + "task_loss": 0.09323906153440475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020881684496998787, + "epoch": 9.91, + "learning_rate": 3.154506340161269e-05, + "loss": 0.0296, + "step": 10440, + "task_loss": 0.10763251781463623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06025876849889755, + "epoch": 9.92, + "learning_rate": 3.1534779201131366e-05, + "loss": 0.0771, + "step": 10441, + "task_loss": 0.22887033224105835 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059150584042072296, + "epoch": 9.92, + "learning_rate": 3.152449381357593e-05, + "loss": 0.0634, + "step": 10442, + "task_loss": 0.1019982397556305 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08557627350091934, + "epoch": 9.92, + "learning_rate": 3.151420724081478e-05, + "loss": 0.0847, + "step": 10443, + "task_loss": 0.07654394954442978 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05869875103235245, + "epoch": 9.92, + "learning_rate": 3.1503919484716495e-05, + "loss": 0.0728, + "step": 10444, + "task_loss": 0.19989144802093506 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01628267765045166, + "epoch": 9.92, + "learning_rate": 3.149363054714992e-05, + "loss": 0.0268, + "step": 10445, + "task_loss": 0.12150625884532928 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0214032344520092, + "epoch": 9.92, + "learning_rate": 3.148334042998408e-05, + "loss": 0.0206, + "step": 10446, + "task_loss": 0.013740543276071548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0316503532230854, + "epoch": 9.92, + "learning_rate": 3.1473049135088225e-05, + "loss": 0.0297, + "step": 10447, + "task_loss": 0.012053485959768295 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012814265675842762, + "epoch": 9.92, + "learning_rate": 3.146275666433183e-05, + "loss": 0.0122, + "step": 10448, + "task_loss": 0.007167477160692215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05052739381790161, + "epoch": 9.92, + "learning_rate": 3.145246301958455e-05, + "loss": 0.0509, + "step": 10449, + "task_loss": 0.054102275520563126 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.044567592442035675, + "epoch": 9.92, + "learning_rate": 3.14421682027163e-05, + "loss": 0.0422, + "step": 10450, + "task_loss": 0.021235253661870956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019051125273108482, + "epoch": 9.92, + "learning_rate": 3.143187221559715e-05, + "loss": 0.0181, + "step": 10451, + "task_loss": 0.009563138708472252 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0175587460398674, + "epoch": 9.93, + "learning_rate": 3.142157506009746e-05, + "loss": 0.0162, + "step": 10452, + "task_loss": 0.003730185329914093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025068845599889755, + "epoch": 9.93, + "learning_rate": 3.141127673808772e-05, + "loss": 0.0249, + "step": 10453, + "task_loss": 0.023736948147416115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012271126732230186, + "epoch": 9.93, + "learning_rate": 3.140097725143868e-05, + "loss": 0.0116, + "step": 10454, + "task_loss": 0.005191892385482788 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0380866639316082, + "epoch": 9.93, + "learning_rate": 3.13906766020213e-05, + "loss": 0.0387, + "step": 10455, + "task_loss": 0.04454638063907623 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0549575500190258, + "epoch": 9.93, + "learning_rate": 3.138037479170674e-05, + "loss": 0.0522, + "step": 10456, + "task_loss": 0.02707308530807495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06924262642860413, + "epoch": 9.93, + "learning_rate": 3.137007182236637e-05, + "loss": 0.0731, + "step": 10457, + "task_loss": 0.10814409703016281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02348874695599079, + "epoch": 9.93, + "learning_rate": 3.1359767695871767e-05, + "loss": 0.0272, + "step": 10458, + "task_loss": 0.06097613647580147 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04042445123195648, + "epoch": 9.93, + "learning_rate": 3.134946241409474e-05, + "loss": 0.0492, + "step": 10459, + "task_loss": 0.127986341714859 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018123149871826172, + "epoch": 9.93, + "learning_rate": 3.133915597890729e-05, + "loss": 0.02, + "step": 10460, + "task_loss": 0.036422938108444214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018057895824313164, + "epoch": 9.93, + "learning_rate": 3.132884839218162e-05, + "loss": 0.0172, + "step": 10461, + "task_loss": 0.009080575779080391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02193017117679119, + "epoch": 9.94, + "learning_rate": 3.131853965579016e-05, + "loss": 0.0215, + "step": 10462, + "task_loss": 0.017662420868873596 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041326068341732025, + "epoch": 9.94, + "learning_rate": 3.130822977160554e-05, + "loss": 0.0484, + "step": 10463, + "task_loss": 0.11219672113656998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015017848461866379, + "epoch": 9.94, + "learning_rate": 3.129791874150062e-05, + "loss": 0.0244, + "step": 10464, + "task_loss": 0.1089889407157898 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01874970830976963, + "epoch": 9.94, + "learning_rate": 3.1287606567348406e-05, + "loss": 0.0256, + "step": 10465, + "task_loss": 0.08684463053941727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020482422783970833, + "epoch": 9.94, + "learning_rate": 3.1277293251022185e-05, + "loss": 0.0274, + "step": 10466, + "task_loss": 0.08978745341300964 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02151617407798767, + "epoch": 9.94, + "learning_rate": 3.126697879439541e-05, + "loss": 0.0205, + "step": 10467, + "task_loss": 0.011844877153635025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01606358028948307, + "epoch": 9.94, + "learning_rate": 3.1256663199341764e-05, + "loss": 0.0152, + "step": 10468, + "task_loss": 0.00763893686234951 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030537568032741547, + "epoch": 9.94, + "learning_rate": 3.124634646773511e-05, + "loss": 0.0283, + "step": 10469, + "task_loss": 0.007955452427268028 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02014528028666973, + "epoch": 9.94, + "learning_rate": 3.1236028601449534e-05, + "loss": 0.0195, + "step": 10470, + "task_loss": 0.013426566496491432 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02128102444112301, + "epoch": 9.94, + "learning_rate": 3.1225709602359335e-05, + "loss": 0.0285, + "step": 10471, + "task_loss": 0.09380625188350677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016784438863396645, + "epoch": 9.94, + "learning_rate": 3.1215389472338995e-05, + "loss": 0.0157, + "step": 10472, + "task_loss": 0.0061706434935331345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04040418565273285, + "epoch": 9.95, + "learning_rate": 3.1205068213263234e-05, + "loss": 0.043, + "step": 10473, + "task_loss": 0.06682712584733963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012193303555250168, + "epoch": 9.95, + "learning_rate": 3.119474582700695e-05, + "loss": 0.0168, + "step": 10474, + "task_loss": 0.05839819461107254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06726071983575821, + "epoch": 9.95, + "learning_rate": 3.118442231544524e-05, + "loss": 0.071, + "step": 10475, + "task_loss": 0.10484815388917923 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026226460933685303, + "epoch": 9.95, + "learning_rate": 3.117409768045344e-05, + "loss": 0.0296, + "step": 10476, + "task_loss": 0.06030692532658577 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011876748874783516, + "epoch": 9.95, + "learning_rate": 3.116377192390706e-05, + "loss": 0.0113, + "step": 10477, + "task_loss": 0.006212221458554268 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08747504651546478, + "epoch": 9.95, + "learning_rate": 3.115344504768183e-05, + "loss": 0.0951, + "step": 10478, + "task_loss": 0.16354155540466309 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07612023502588272, + "epoch": 9.95, + "learning_rate": 3.1143117053653665e-05, + "loss": 0.0816, + "step": 10479, + "task_loss": 0.1306367814540863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02969006821513176, + "epoch": 9.95, + "learning_rate": 3.113278794369869e-05, + "loss": 0.0357, + "step": 10480, + "task_loss": 0.08966681361198425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08898676186800003, + "epoch": 9.95, + "learning_rate": 3.112245771969327e-05, + "loss": 0.0848, + "step": 10481, + "task_loss": 0.04718249663710594 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11488527804613113, + "epoch": 9.95, + "learning_rate": 3.1112126383513914e-05, + "loss": 0.1121, + "step": 10482, + "task_loss": 0.08729420602321625 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.048652954399585724, + "epoch": 9.96, + "learning_rate": 3.110179393703737e-05, + "loss": 0.0742, + "step": 10483, + "task_loss": 0.30367979407310486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019496137276291847, + "epoch": 9.96, + "learning_rate": 3.109146038214055e-05, + "loss": 0.0269, + "step": 10484, + "task_loss": 0.09309974312782288 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01703246682882309, + "epoch": 9.96, + "learning_rate": 3.108112572070063e-05, + "loss": 0.0197, + "step": 10485, + "task_loss": 0.043584369122982025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05471110716462135, + "epoch": 9.96, + "learning_rate": 3.1070789954594934e-05, + "loss": 0.0517, + "step": 10486, + "task_loss": 0.024789560586214066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02847129851579666, + "epoch": 9.96, + "learning_rate": 3.1060453085701e-05, + "loss": 0.0314, + "step": 10487, + "task_loss": 0.05726098641753197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018717454746365547, + "epoch": 9.96, + "learning_rate": 3.105011511589658e-05, + "loss": 0.0232, + "step": 10488, + "task_loss": 0.06356090307235718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030354956164956093, + "epoch": 9.96, + "learning_rate": 3.103977604705961e-05, + "loss": 0.0287, + "step": 10489, + "task_loss": 0.013773368671536446 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05581222474575043, + "epoch": 9.96, + "learning_rate": 3.102943588106824e-05, + "loss": 0.053, + "step": 10490, + "task_loss": 0.027538809925317764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021054701879620552, + "epoch": 9.96, + "learning_rate": 3.10190946198008e-05, + "loss": 0.0211, + "step": 10491, + "task_loss": 0.021210404112935066 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041227295994758606, + "epoch": 9.96, + "learning_rate": 3.100875226513583e-05, + "loss": 0.0386, + "step": 10492, + "task_loss": 0.015084227547049522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03447715565562248, + "epoch": 9.96, + "learning_rate": 3.099840881895208e-05, + "loss": 0.0406, + "step": 10493, + "task_loss": 0.09537314623594284 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05250038951635361, + "epoch": 9.97, + "learning_rate": 3.098806428312847e-05, + "loss": 0.0565, + "step": 10494, + "task_loss": 0.09219056367874146 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0351855605840683, + "epoch": 9.97, + "learning_rate": 3.097771865954415e-05, + "loss": 0.036, + "step": 10495, + "task_loss": 0.04331498593091965 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0283675417304039, + "epoch": 9.97, + "learning_rate": 3.096737195007845e-05, + "loss": 0.0341, + "step": 10496, + "task_loss": 0.08616603910923004 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029870422556996346, + "epoch": 9.97, + "learning_rate": 3.0957024156610884e-05, + "loss": 0.038, + "step": 10497, + "task_loss": 0.11162212491035461 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012384091503918171, + "epoch": 9.97, + "learning_rate": 3.09466752810212e-05, + "loss": 0.0158, + "step": 10498, + "task_loss": 0.04627315700054169 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013409021310508251, + "epoch": 9.97, + "learning_rate": 3.093632532518931e-05, + "loss": 0.0155, + "step": 10499, + "task_loss": 0.03382343798875809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010947839356958866, + "epoch": 9.97, + "learning_rate": 3.092597429099534e-05, + "loss": 0.0137, + "step": 10500, + "task_loss": 0.03889209032058716 + }, + { + "epoch": 9.97, + "eval_accuracy": 0.8956422018348624, + "eval_loss": 0.4390338361263275, + "eval_runtime": 18.2957, + "eval_samples_per_second": 47.661, + "eval_steps_per_second": 5.958, + "step": 10500 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02746719866991043, + "epoch": 9.97, + "learning_rate": 3.0915622180319585e-05, + "loss": 0.0316, + "step": 10501, + "task_loss": 0.06869719922542572 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039203185588121414, + "epoch": 9.97, + "learning_rate": 3.090526899504259e-05, + "loss": 0.0372, + "step": 10502, + "task_loss": 0.019136063754558563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025588029995560646, + "epoch": 9.97, + "learning_rate": 3.0894914737045034e-05, + "loss": 0.0305, + "step": 10503, + "task_loss": 0.07455385476350784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030900314450263977, + "epoch": 9.98, + "learning_rate": 3.088455940820782e-05, + "loss": 0.0378, + "step": 10504, + "task_loss": 0.09984858334064484 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059486109763383865, + "epoch": 9.98, + "learning_rate": 3.087420301041206e-05, + "loss": 0.0644, + "step": 10505, + "task_loss": 0.10867089033126831 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012473450042307377, + "epoch": 9.98, + "learning_rate": 3.086384554553902e-05, + "loss": 0.0157, + "step": 10506, + "task_loss": 0.044721417129039764 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037564076483249664, + "epoch": 9.98, + "learning_rate": 3.0853487015470206e-05, + "loss": 0.0342, + "step": 10507, + "task_loss": 0.00427047535777092 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.21402209997177124, + "epoch": 9.98, + "learning_rate": 3.084312742208728e-05, + "loss": 0.2225, + "step": 10508, + "task_loss": 0.2992434501647949 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05252481997013092, + "epoch": 9.98, + "learning_rate": 3.083276676727212e-05, + "loss": 0.0562, + "step": 10509, + "task_loss": 0.08937288820743561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02034352719783783, + "epoch": 9.98, + "learning_rate": 3.082240505290678e-05, + "loss": 0.0345, + "step": 10510, + "task_loss": 0.16238275170326233 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02308393456041813, + "epoch": 9.98, + "learning_rate": 3.081204228087353e-05, + "loss": 0.0238, + "step": 10511, + "task_loss": 0.02994043007493019 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022067924961447716, + "epoch": 9.98, + "learning_rate": 3.08016784530548e-05, + "loss": 0.0206, + "step": 10512, + "task_loss": 0.007659193128347397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017336195334792137, + "epoch": 9.98, + "learning_rate": 3.0791313571333244e-05, + "loss": 0.0188, + "step": 10513, + "task_loss": 0.031775638461112976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04067476838827133, + "epoch": 9.98, + "learning_rate": 3.078094763759168e-05, + "loss": 0.0461, + "step": 10514, + "task_loss": 0.09492932260036469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01405811682343483, + "epoch": 9.99, + "learning_rate": 3.0770580653713146e-05, + "loss": 0.0137, + "step": 10515, + "task_loss": 0.010203549638390541 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023571021854877472, + "epoch": 9.99, + "learning_rate": 3.076021262158084e-05, + "loss": 0.038, + "step": 10516, + "task_loss": 0.1676616072654724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019903654232621193, + "epoch": 9.99, + "learning_rate": 3.074984354307817e-05, + "loss": 0.0192, + "step": 10517, + "task_loss": 0.012664202600717545 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0405997633934021, + "epoch": 9.99, + "learning_rate": 3.073947342008873e-05, + "loss": 0.0409, + "step": 10518, + "task_loss": 0.04333402216434479 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022368015721440315, + "epoch": 9.99, + "learning_rate": 3.07291022544963e-05, + "loss": 0.0232, + "step": 10519, + "task_loss": 0.030709004029631615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013003375381231308, + "epoch": 9.99, + "learning_rate": 3.0718730048184855e-05, + "loss": 0.0123, + "step": 10520, + "task_loss": 0.0056927260011434555 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021634429693222046, + "epoch": 9.99, + "learning_rate": 3.0708356803038556e-05, + "loss": 0.0208, + "step": 10521, + "task_loss": 0.01319202035665512 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026351455599069595, + "epoch": 9.99, + "learning_rate": 3.069798252094175e-05, + "loss": 0.0246, + "step": 10522, + "task_loss": 0.008785083889961243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08953380584716797, + "epoch": 9.99, + "learning_rate": 3.068760720377897e-05, + "loss": 0.0882, + "step": 10523, + "task_loss": 0.07605984061956406 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01611064001917839, + "epoch": 9.99, + "learning_rate": 3.067723085343496e-05, + "loss": 0.0154, + "step": 10524, + "task_loss": 0.008628584444522858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03214561194181442, + "epoch": 10.0, + "learning_rate": 3.066685347179462e-05, + "loss": 0.0427, + "step": 10525, + "task_loss": 0.13809731602668762 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017522085458040237, + "epoch": 10.0, + "learning_rate": 3.065647506074306e-05, + "loss": 0.0165, + "step": 10526, + "task_loss": 0.007697628811001778 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03798111528158188, + "epoch": 10.0, + "learning_rate": 3.064609562216555e-05, + "loss": 0.0371, + "step": 10527, + "task_loss": 0.028712373226881027 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08762382715940475, + "epoch": 10.0, + "learning_rate": 3.063571515794759e-05, + "loss": 0.09, + "step": 10528, + "task_loss": 0.11169522255659103 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.060012731701135635, + "epoch": 10.0, + "learning_rate": 3.062533366997483e-05, + "loss": 0.0631, + "step": 10529, + "task_loss": 0.09081701934337616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06982729583978653, + "epoch": 10.0, + "learning_rate": 3.061495116013311e-05, + "loss": 0.0848, + "step": 10530, + "task_loss": 0.21950267255306244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015027199871838093, + "epoch": 10.0, + "learning_rate": 3.060456763030847e-05, + "loss": 0.0151, + "step": 10531, + "task_loss": 0.015308814123272896 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010798173025250435, + "epoch": 10.0, + "learning_rate": 3.059418308238713e-05, + "loss": 0.011, + "step": 10532, + "task_loss": 0.012500785291194916 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03574404492974281, + "epoch": 10.0, + "learning_rate": 3.0583797518255505e-05, + "loss": 0.0326, + "step": 10533, + "task_loss": 0.0042223744094371796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021271761506795883, + "epoch": 10.0, + "learning_rate": 3.057341093980015e-05, + "loss": 0.0316, + "step": 10534, + "task_loss": 0.12449042499065399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11009443551301956, + "epoch": 10.0, + "learning_rate": 3.056302334890786e-05, + "loss": 0.1068, + "step": 10535, + "task_loss": 0.07683957368135452 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018123319372534752, + "epoch": 10.01, + "learning_rate": 3.055263474746559e-05, + "loss": 0.0185, + "step": 10536, + "task_loss": 0.021713176742196083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016921810805797577, + "epoch": 10.01, + "learning_rate": 3.054224513736048e-05, + "loss": 0.0374, + "step": 10537, + "task_loss": 0.2221601903438568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017352506518363953, + "epoch": 10.01, + "learning_rate": 3.0531854520479844e-05, + "loss": 0.0216, + "step": 10538, + "task_loss": 0.05965179204940796 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04468035697937012, + "epoch": 10.01, + "learning_rate": 3.05214628987112e-05, + "loss": 0.0476, + "step": 10539, + "task_loss": 0.07353688031435013 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015710055828094482, + "epoch": 10.01, + "learning_rate": 3.0511070273942217e-05, + "loss": 0.0148, + "step": 10540, + "task_loss": 0.006397552788257599 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014855926856398582, + "epoch": 10.01, + "learning_rate": 3.0500676648060776e-05, + "loss": 0.0181, + "step": 10541, + "task_loss": 0.046922795474529266 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019756915047764778, + "epoch": 10.01, + "learning_rate": 3.049028202295494e-05, + "loss": 0.0209, + "step": 10542, + "task_loss": 0.03078971616923809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014726649969816208, + "epoch": 10.01, + "learning_rate": 3.0479886400512937e-05, + "loss": 0.0273, + "step": 10543, + "task_loss": 0.14058496057987213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030262373387813568, + "epoch": 10.01, + "learning_rate": 3.0469489782623163e-05, + "loss": 0.029, + "step": 10544, + "task_loss": 0.01749301515519619 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024717876687645912, + "epoch": 10.01, + "learning_rate": 3.045909217117424e-05, + "loss": 0.0291, + "step": 10545, + "task_loss": 0.06830822676420212 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010806870646774769, + "epoch": 10.02, + "learning_rate": 3.0448693568054924e-05, + "loss": 0.0103, + "step": 10546, + "task_loss": 0.005712360143661499 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022307565435767174, + "epoch": 10.02, + "learning_rate": 3.0438293975154186e-05, + "loss": 0.0247, + "step": 10547, + "task_loss": 0.04641730338335037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031023293733596802, + "epoch": 10.02, + "learning_rate": 3.042789339436116e-05, + "loss": 0.0311, + "step": 10548, + "task_loss": 0.031546495854854584 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02841011807322502, + "epoch": 10.02, + "learning_rate": 3.041749182756515e-05, + "loss": 0.0366, + "step": 10549, + "task_loss": 0.11078496277332306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03763877600431442, + "epoch": 10.02, + "learning_rate": 3.0407089276655664e-05, + "loss": 0.0385, + "step": 10550, + "task_loss": 0.04576456546783447 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01621590554714203, + "epoch": 10.02, + "learning_rate": 3.039668574352237e-05, + "loss": 0.027, + "step": 10551, + "task_loss": 0.12407008558511734 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018488582223653793, + "epoch": 10.02, + "learning_rate": 3.0386281230055113e-05, + "loss": 0.0176, + "step": 10552, + "task_loss": 0.009799566119909286 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012251723557710648, + "epoch": 10.02, + "learning_rate": 3.0375875738143938e-05, + "loss": 0.0197, + "step": 10553, + "task_loss": 0.08636859059333801 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019306058064103127, + "epoch": 10.02, + "learning_rate": 3.0365469269679042e-05, + "loss": 0.0194, + "step": 10554, + "task_loss": 0.020166553556919098 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022004347294569016, + "epoch": 10.02, + "learning_rate": 3.0355061826550813e-05, + "loss": 0.0204, + "step": 10555, + "task_loss": 0.005572935566306114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010447701439261436, + "epoch": 10.02, + "learning_rate": 3.0344653410649815e-05, + "loss": 0.0171, + "step": 10556, + "task_loss": 0.07670579850673676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023317281156778336, + "epoch": 10.03, + "learning_rate": 3.033424402386678e-05, + "loss": 0.0288, + "step": 10557, + "task_loss": 0.07775168120861053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01650930941104889, + "epoch": 10.03, + "learning_rate": 3.032383366809263e-05, + "loss": 0.0173, + "step": 10558, + "task_loss": 0.02475823275744915 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016953203827142715, + "epoch": 10.03, + "learning_rate": 3.031342234521845e-05, + "loss": 0.016, + "step": 10559, + "task_loss": 0.007815249264240265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.045376237481832504, + "epoch": 10.03, + "learning_rate": 3.030301005713552e-05, + "loss": 0.0476, + "step": 10560, + "task_loss": 0.06719513982534409 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010588767006993294, + "epoch": 10.03, + "learning_rate": 3.0292596805735274e-05, + "loss": 0.0157, + "step": 10561, + "task_loss": 0.06145908683538437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013043254613876343, + "epoch": 10.03, + "learning_rate": 3.028218259290932e-05, + "loss": 0.0121, + "step": 10562, + "task_loss": 0.003523891791701317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030626816675066948, + "epoch": 10.03, + "learning_rate": 3.0271767420549463e-05, + "loss": 0.0305, + "step": 10563, + "task_loss": 0.02899116836488247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015171288512647152, + "epoch": 10.03, + "learning_rate": 3.0261351290547667e-05, + "loss": 0.019, + "step": 10564, + "task_loss": 0.053528182208538055 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011217884719371796, + "epoch": 10.03, + "learning_rate": 3.025093420479607e-05, + "loss": 0.0113, + "step": 10565, + "task_loss": 0.012106716632843018 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.055433209985494614, + "epoch": 10.03, + "learning_rate": 3.0240516165186976e-05, + "loss": 0.055, + "step": 10566, + "task_loss": 0.05153985321521759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020901085808873177, + "epoch": 10.04, + "learning_rate": 3.0230097173612896e-05, + "loss": 0.0282, + "step": 10567, + "task_loss": 0.09422680735588074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00867534801363945, + "epoch": 10.04, + "learning_rate": 3.021967723196647e-05, + "loss": 0.0227, + "step": 10568, + "task_loss": 0.1493276208639145 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02514798566699028, + "epoch": 10.04, + "learning_rate": 3.020925634214054e-05, + "loss": 0.0274, + "step": 10569, + "task_loss": 0.04775901138782501 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010010787285864353, + "epoch": 10.04, + "learning_rate": 3.01988345060281e-05, + "loss": 0.0094, + "step": 10570, + "task_loss": 0.0035102032124996185 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03369935229420662, + "epoch": 10.04, + "learning_rate": 3.018841172552234e-05, + "loss": 0.033, + "step": 10571, + "task_loss": 0.026725368574261665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06951646506786346, + "epoch": 10.04, + "learning_rate": 3.01779880025166e-05, + "loss": 0.073, + "step": 10572, + "task_loss": 0.10399917513132095 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016049357131123543, + "epoch": 10.04, + "learning_rate": 3.0167563338904402e-05, + "loss": 0.0153, + "step": 10573, + "task_loss": 0.008852284401655197 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024919727817177773, + "epoch": 10.04, + "learning_rate": 3.0157137736579445e-05, + "loss": 0.024, + "step": 10574, + "task_loss": 0.01523817703127861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01857706718146801, + "epoch": 10.04, + "learning_rate": 3.014671119743556e-05, + "loss": 0.0173, + "step": 10575, + "task_loss": 0.0053652990609407425 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016847968101501465, + "epoch": 10.04, + "learning_rate": 3.013628372336682e-05, + "loss": 0.0155, + "step": 10576, + "task_loss": 0.0033319760113954544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018795382231473923, + "epoch": 10.04, + "learning_rate": 3.0125855316267394e-05, + "loss": 0.0242, + "step": 10577, + "task_loss": 0.07253532111644745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06431741267442703, + "epoch": 10.05, + "learning_rate": 3.0115425978031663e-05, + "loss": 0.0702, + "step": 10578, + "task_loss": 0.12317221611738205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047512613236904144, + "epoch": 10.05, + "learning_rate": 3.0104995710554174e-05, + "loss": 0.0457, + "step": 10579, + "task_loss": 0.02974691055715084 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07968870550394058, + "epoch": 10.05, + "learning_rate": 3.0094564515729623e-05, + "loss": 0.0936, + "step": 10580, + "task_loss": 0.21833162009716034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008304203860461712, + "epoch": 10.05, + "learning_rate": 3.0084132395452896e-05, + "loss": 0.0079, + "step": 10581, + "task_loss": 0.004243377596139908 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016128698363900185, + "epoch": 10.05, + "learning_rate": 3.0073699351619033e-05, + "loss": 0.0175, + "step": 10582, + "task_loss": 0.03016304410994053 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01745760627090931, + "epoch": 10.05, + "learning_rate": 3.0063265386123247e-05, + "loss": 0.0251, + "step": 10583, + "task_loss": 0.09426887333393097 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023326557129621506, + "epoch": 10.05, + "learning_rate": 3.0052830500860912e-05, + "loss": 0.0215, + "step": 10584, + "task_loss": 0.004915602505207062 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04375822842121124, + "epoch": 10.05, + "learning_rate": 3.0042394697727587e-05, + "loss": 0.0401, + "step": 10585, + "task_loss": 0.007045827805995941 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012805687263607979, + "epoch": 10.05, + "learning_rate": 3.0031957978618986e-05, + "loss": 0.012, + "step": 10586, + "task_loss": 0.005244376137852669 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01600104570388794, + "epoch": 10.05, + "learning_rate": 3.002152034543098e-05, + "loss": 0.015, + "step": 10587, + "task_loss": 0.005925571545958519 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008856108412146568, + "epoch": 10.06, + "learning_rate": 3.0011081800059616e-05, + "loss": 0.0084, + "step": 10588, + "task_loss": 0.003961298614740372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02766408771276474, + "epoch": 10.06, + "learning_rate": 3.0000642344401113e-05, + "loss": 0.0406, + "step": 10589, + "task_loss": 0.15749169886112213 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014702584594488144, + "epoch": 10.06, + "learning_rate": 2.999020198035184e-05, + "loss": 0.0137, + "step": 10590, + "task_loss": 0.004804585129022598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013725142925977707, + "epoch": 10.06, + "learning_rate": 2.997976070980836e-05, + "loss": 0.0143, + "step": 10591, + "task_loss": 0.019009843468666077 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027275511994957924, + "epoch": 10.06, + "learning_rate": 2.996931853466734e-05, + "loss": 0.0451, + "step": 10592, + "task_loss": 0.20579108595848083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009938783943653107, + "epoch": 10.06, + "learning_rate": 2.9958875456825692e-05, + "loss": 0.0093, + "step": 10593, + "task_loss": 0.003395296633243561 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00878879614174366, + "epoch": 10.06, + "learning_rate": 2.9948431478180434e-05, + "loss": 0.01, + "step": 10594, + "task_loss": 0.021191343665122986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017414798960089684, + "epoch": 10.06, + "learning_rate": 2.9937986600628758e-05, + "loss": 0.0165, + "step": 10595, + "task_loss": 0.008245648816227913 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008904863148927689, + "epoch": 10.06, + "learning_rate": 2.992754082606804e-05, + "loss": 0.0085, + "step": 10596, + "task_loss": 0.004817705601453781 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05227641388773918, + "epoch": 10.06, + "learning_rate": 2.9917094156395796e-05, + "loss": 0.0602, + "step": 10597, + "task_loss": 0.1313791424036026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016239894554018974, + "epoch": 10.06, + "learning_rate": 2.990664659350973e-05, + "loss": 0.0153, + "step": 10598, + "task_loss": 0.006773691624403 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028258735314011574, + "epoch": 10.07, + "learning_rate": 2.9896198139307668e-05, + "loss": 0.0283, + "step": 10599, + "task_loss": 0.028914695605635643 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027807846665382385, + "epoch": 10.07, + "learning_rate": 2.9885748795687642e-05, + "loss": 0.0382, + "step": 10600, + "task_loss": 0.1314897984266281 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07431833446025848, + "epoch": 10.07, + "learning_rate": 2.9875298564547805e-05, + "loss": 0.0764, + "step": 10601, + "task_loss": 0.09555675089359283 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03642863780260086, + "epoch": 10.07, + "learning_rate": 2.9864847447786503e-05, + "loss": 0.0349, + "step": 10602, + "task_loss": 0.021108128130435944 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030721113085746765, + "epoch": 10.07, + "learning_rate": 2.9854395447302246e-05, + "loss": 0.0358, + "step": 10603, + "task_loss": 0.08126336336135864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03847306966781616, + "epoch": 10.07, + "learning_rate": 2.9843942564993672e-05, + "loss": 0.0412, + "step": 10604, + "task_loss": 0.06578797101974487 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007311783730983734, + "epoch": 10.07, + "learning_rate": 2.98334888027596e-05, + "loss": 0.007, + "step": 10605, + "task_loss": 0.004128355532884598 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017765909433364868, + "epoch": 10.07, + "learning_rate": 2.9823034162499007e-05, + "loss": 0.0178, + "step": 10606, + "task_loss": 0.017818637192249298 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008372337557375431, + "epoch": 10.07, + "learning_rate": 2.981257864611104e-05, + "loss": 0.0081, + "step": 10607, + "task_loss": 0.0059828683733940125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0075715347193181515, + "epoch": 10.07, + "learning_rate": 2.980212225549498e-05, + "loss": 0.0072, + "step": 10608, + "task_loss": 0.0034196972846984863 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015017621219158173, + "epoch": 10.08, + "learning_rate": 2.9791664992550273e-05, + "loss": 0.016, + "step": 10609, + "task_loss": 0.024808872491121292 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03149663656949997, + "epoch": 10.08, + "learning_rate": 2.978120685917656e-05, + "loss": 0.0478, + "step": 10610, + "task_loss": 0.19493995606899261 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018071962520480156, + "epoch": 10.08, + "learning_rate": 2.9770747857273584e-05, + "loss": 0.0249, + "step": 10611, + "task_loss": 0.08605194836854935 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020836105570197105, + "epoch": 10.08, + "learning_rate": 2.9760287988741293e-05, + "loss": 0.0341, + "step": 10612, + "task_loss": 0.15324726700782776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.11783700436353683, + "epoch": 10.08, + "learning_rate": 2.9749827255479755e-05, + "loss": 0.1251, + "step": 10613, + "task_loss": 0.19090542197227478 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05260676145553589, + "epoch": 10.08, + "learning_rate": 2.9739365659389223e-05, + "loss": 0.0506, + "step": 10614, + "task_loss": 0.03276871144771576 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020378753542900085, + "epoch": 10.08, + "learning_rate": 2.972890320237009e-05, + "loss": 0.0242, + "step": 10615, + "task_loss": 0.05864740163087845 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027730267494916916, + "epoch": 10.08, + "learning_rate": 2.971843988632292e-05, + "loss": 0.0394, + "step": 10616, + "task_loss": 0.14460690319538116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009068669751286507, + "epoch": 10.08, + "learning_rate": 2.970797571314842e-05, + "loss": 0.0085, + "step": 10617, + "task_loss": 0.0035065151751041412 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014962357468903065, + "epoch": 10.08, + "learning_rate": 2.9697510684747454e-05, + "loss": 0.0271, + "step": 10618, + "task_loss": 0.13601535558700562 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0636109784245491, + "epoch": 10.08, + "learning_rate": 2.9687044803021057e-05, + "loss": 0.0619, + "step": 10619, + "task_loss": 0.046097345650196075 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01265124510973692, + "epoch": 10.09, + "learning_rate": 2.9676578069870392e-05, + "loss": 0.0118, + "step": 10620, + "task_loss": 0.004545770585536957 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02781161107122898, + "epoch": 10.09, + "learning_rate": 2.9666110487196798e-05, + "loss": 0.0261, + "step": 10621, + "task_loss": 0.011152038350701332 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03320011496543884, + "epoch": 10.09, + "learning_rate": 2.9655642056901762e-05, + "loss": 0.0364, + "step": 10622, + "task_loss": 0.06528480350971222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08747844398021698, + "epoch": 10.09, + "learning_rate": 2.9645172780886927e-05, + "loss": 0.092, + "step": 10623, + "task_loss": 0.13289803266525269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024324875324964523, + "epoch": 10.09, + "learning_rate": 2.9634702661054085e-05, + "loss": 0.023, + "step": 10624, + "task_loss": 0.011415783315896988 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009565697982907295, + "epoch": 10.09, + "learning_rate": 2.962423169930518e-05, + "loss": 0.0089, + "step": 10625, + "task_loss": 0.003159165382385254 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013625957071781158, + "epoch": 10.09, + "learning_rate": 2.961375989754232e-05, + "loss": 0.0128, + "step": 10626, + "task_loss": 0.0050227586179971695 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05235249549150467, + "epoch": 10.09, + "learning_rate": 2.9603287257667754e-05, + "loss": 0.0643, + "step": 10627, + "task_loss": 0.17228303849697113 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021474594250321388, + "epoch": 10.09, + "learning_rate": 2.9592813781583885e-05, + "loss": 0.02, + "step": 10628, + "task_loss": 0.007135756313800812 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027554640546441078, + "epoch": 10.09, + "learning_rate": 2.958233947119328e-05, + "loss": 0.0254, + "step": 10629, + "task_loss": 0.005915815010666847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06590898334980011, + "epoch": 10.09, + "learning_rate": 2.9571864328398636e-05, + "loss": 0.0754, + "step": 10630, + "task_loss": 0.1609448492527008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04161786288022995, + "epoch": 10.1, + "learning_rate": 2.956138835510282e-05, + "loss": 0.0464, + "step": 10631, + "task_loss": 0.08935808390378952 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01830127462744713, + "epoch": 10.1, + "learning_rate": 2.9550911553208838e-05, + "loss": 0.02, + "step": 10632, + "task_loss": 0.034862592816352844 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0464775525033474, + "epoch": 10.1, + "learning_rate": 2.954043392461986e-05, + "loss": 0.0431, + "step": 10633, + "task_loss": 0.01255410723388195 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021407488733530045, + "epoch": 10.1, + "learning_rate": 2.952995547123919e-05, + "loss": 0.0287, + "step": 10634, + "task_loss": 0.09397697448730469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013649847358465195, + "epoch": 10.1, + "learning_rate": 2.9519476194970286e-05, + "loss": 0.0128, + "step": 10635, + "task_loss": 0.005092758685350418 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03821259364485741, + "epoch": 10.1, + "learning_rate": 2.9508996097716777e-05, + "loss": 0.0355, + "step": 10636, + "task_loss": 0.011517934501171112 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0182490237057209, + "epoch": 10.1, + "learning_rate": 2.949851518138241e-05, + "loss": 0.0306, + "step": 10637, + "task_loss": 0.14180639386177063 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031840305775403976, + "epoch": 10.1, + "learning_rate": 2.948803344787109e-05, + "loss": 0.0356, + "step": 10638, + "task_loss": 0.06947970390319824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01208855863660574, + "epoch": 10.1, + "learning_rate": 2.947755089908688e-05, + "loss": 0.0287, + "step": 10639, + "task_loss": 0.17781664431095123 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01693141832947731, + "epoch": 10.1, + "learning_rate": 2.946706753693398e-05, + "loss": 0.0278, + "step": 10640, + "task_loss": 0.1261083036661148 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014154396951198578, + "epoch": 10.11, + "learning_rate": 2.945658336331676e-05, + "loss": 0.0231, + "step": 10641, + "task_loss": 0.10346969962120056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0599503219127655, + "epoch": 10.11, + "learning_rate": 2.9446098380139703e-05, + "loss": 0.0846, + "step": 10642, + "task_loss": 0.30606013536453247 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02652948535978794, + "epoch": 10.11, + "learning_rate": 2.9435612589307458e-05, + "loss": 0.0424, + "step": 10643, + "task_loss": 0.18499121069908142 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028400473296642303, + "epoch": 10.11, + "learning_rate": 2.942512599272483e-05, + "loss": 0.0444, + "step": 10644, + "task_loss": 0.18858596682548523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02752113714814186, + "epoch": 10.11, + "learning_rate": 2.9414638592296752e-05, + "loss": 0.031, + "step": 10645, + "task_loss": 0.06193367764353752 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02233045920729637, + "epoch": 10.11, + "learning_rate": 2.9404150389928316e-05, + "loss": 0.039, + "step": 10646, + "task_loss": 0.18857558071613312 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012071599252521992, + "epoch": 10.11, + "learning_rate": 2.9393661387524745e-05, + "loss": 0.0204, + "step": 10647, + "task_loss": 0.0955902636051178 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009622898884117603, + "epoch": 10.11, + "learning_rate": 2.9383171586991424e-05, + "loss": 0.0171, + "step": 10648, + "task_loss": 0.08440146595239639 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011028185486793518, + "epoch": 10.11, + "learning_rate": 2.9372680990233875e-05, + "loss": 0.0179, + "step": 10649, + "task_loss": 0.07927846908569336 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0299701951444149, + "epoch": 10.11, + "learning_rate": 2.9362189599157776e-05, + "loss": 0.038, + "step": 10650, + "task_loss": 0.10991066694259644 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01364204753190279, + "epoch": 10.11, + "learning_rate": 2.9351697415668917e-05, + "loss": 0.0286, + "step": 10651, + "task_loss": 0.16349458694458008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014143523760139942, + "epoch": 10.12, + "learning_rate": 2.9341204441673266e-05, + "loss": 0.0235, + "step": 10652, + "task_loss": 0.10781119763851166 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0211515910923481, + "epoch": 10.12, + "learning_rate": 2.9330710679076916e-05, + "loss": 0.0267, + "step": 10653, + "task_loss": 0.07670915871858597 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011677171103656292, + "epoch": 10.12, + "learning_rate": 2.9320216129786116e-05, + "loss": 0.0113, + "step": 10654, + "task_loss": 0.007633142173290253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03487570211291313, + "epoch": 10.12, + "learning_rate": 2.9309720795707257e-05, + "loss": 0.0425, + "step": 10655, + "task_loss": 0.11129167675971985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014407580718398094, + "epoch": 10.12, + "learning_rate": 2.9299224678746855e-05, + "loss": 0.0138, + "step": 10656, + "task_loss": 0.008088191971182823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014523474499583244, + "epoch": 10.12, + "learning_rate": 2.928872778081158e-05, + "loss": 0.0136, + "step": 10657, + "task_loss": 0.005704553797841072 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05787359178066254, + "epoch": 10.12, + "learning_rate": 2.9278230103808257e-05, + "loss": 0.0582, + "step": 10658, + "task_loss": 0.061048611998558044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020860906690359116, + "epoch": 10.12, + "learning_rate": 2.9267731649643827e-05, + "loss": 0.0373, + "step": 10659, + "task_loss": 0.18554821610450745 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015227505937218666, + "epoch": 10.12, + "learning_rate": 2.9257232420225394e-05, + "loss": 0.0218, + "step": 10660, + "task_loss": 0.08097986876964569 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013915725983679295, + "epoch": 10.12, + "learning_rate": 2.9246732417460178e-05, + "loss": 0.0131, + "step": 10661, + "task_loss": 0.005335215479135513 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015429834835231304, + "epoch": 10.13, + "learning_rate": 2.9236231643255578e-05, + "loss": 0.0209, + "step": 10662, + "task_loss": 0.07019021362066269 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016493186354637146, + "epoch": 10.13, + "learning_rate": 2.922573009951909e-05, + "loss": 0.0259, + "step": 10663, + "task_loss": 0.11039119958877563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016565848141908646, + "epoch": 10.13, + "learning_rate": 2.9215227788158382e-05, + "loss": 0.0183, + "step": 10664, + "task_loss": 0.03420030698180199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021225640550255775, + "epoch": 10.13, + "learning_rate": 2.920472471108125e-05, + "loss": 0.0269, + "step": 10665, + "task_loss": 0.07766622304916382 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06270953267812729, + "epoch": 10.13, + "learning_rate": 2.919422087019561e-05, + "loss": 0.0811, + "step": 10666, + "task_loss": 0.24679216742515564 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00949104130268097, + "epoch": 10.13, + "learning_rate": 2.9183716267409562e-05, + "loss": 0.0097, + "step": 10667, + "task_loss": 0.011834444478154182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016700657084584236, + "epoch": 10.13, + "learning_rate": 2.9173210904631297e-05, + "loss": 0.0192, + "step": 10668, + "task_loss": 0.04135492444038391 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019157694652676582, + "epoch": 10.13, + "learning_rate": 2.916270478376918e-05, + "loss": 0.018, + "step": 10669, + "task_loss": 0.00787227414548397 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015583924949169159, + "epoch": 10.13, + "learning_rate": 2.9152197906731687e-05, + "loss": 0.0191, + "step": 10670, + "task_loss": 0.05046524479985237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0134699996560812, + "epoch": 10.13, + "learning_rate": 2.9141690275427445e-05, + "loss": 0.0137, + "step": 10671, + "task_loss": 0.015451043844223022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023017989471554756, + "epoch": 10.13, + "learning_rate": 2.9131181891765226e-05, + "loss": 0.0218, + "step": 10672, + "task_loss": 0.011321371421217918 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.045944422483444214, + "epoch": 10.14, + "learning_rate": 2.9120672757653916e-05, + "loss": 0.0454, + "step": 10673, + "task_loss": 0.04038790985941887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05254145711660385, + "epoch": 10.14, + "learning_rate": 2.9110162875002552e-05, + "loss": 0.0481, + "step": 10674, + "task_loss": 0.0084177665412426 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028896521776914597, + "epoch": 10.14, + "learning_rate": 2.909965224572031e-05, + "loss": 0.0289, + "step": 10675, + "task_loss": 0.02889561466872692 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03066609799861908, + "epoch": 10.14, + "learning_rate": 2.9089140871716492e-05, + "loss": 0.0331, + "step": 10676, + "task_loss": 0.05538489297032356 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011451882310211658, + "epoch": 10.14, + "learning_rate": 2.9078628754900543e-05, + "loss": 0.019, + "step": 10677, + "task_loss": 0.08714932203292847 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01488979160785675, + "epoch": 10.14, + "learning_rate": 2.9068115897182036e-05, + "loss": 0.0221, + "step": 10678, + "task_loss": 0.08699451386928558 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00719775166362524, + "epoch": 10.14, + "learning_rate": 2.905760230047068e-05, + "loss": 0.0132, + "step": 10679, + "task_loss": 0.06723162531852722 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011093568056821823, + "epoch": 10.14, + "learning_rate": 2.9047087966676327e-05, + "loss": 0.0141, + "step": 10680, + "task_loss": 0.040885455906391144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03617207705974579, + "epoch": 10.14, + "learning_rate": 2.903657289770896e-05, + "loss": 0.0479, + "step": 10681, + "task_loss": 0.15319961309432983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018763557076454163, + "epoch": 10.14, + "learning_rate": 2.902605709547868e-05, + "loss": 0.0249, + "step": 10682, + "task_loss": 0.08051219582557678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016470609232783318, + "epoch": 10.15, + "learning_rate": 2.9015540561895738e-05, + "loss": 0.0278, + "step": 10683, + "task_loss": 0.12958627939224243 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10346265137195587, + "epoch": 10.15, + "learning_rate": 2.9005023298870514e-05, + "loss": 0.1048, + "step": 10684, + "task_loss": 0.11723846197128296 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.064060278236866, + "epoch": 10.15, + "learning_rate": 2.8994505308313523e-05, + "loss": 0.0662, + "step": 10685, + "task_loss": 0.08514577150344849 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02188374474644661, + "epoch": 10.15, + "learning_rate": 2.8983986592135404e-05, + "loss": 0.025, + "step": 10686, + "task_loss": 0.05310831218957901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020624695345759392, + "epoch": 10.15, + "learning_rate": 2.897346715224693e-05, + "loss": 0.0241, + "step": 10687, + "task_loss": 0.055748291313648224 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06058871001005173, + "epoch": 10.15, + "learning_rate": 2.8962946990559013e-05, + "loss": 0.0679, + "step": 10688, + "task_loss": 0.13335812091827393 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009769352152943611, + "epoch": 10.15, + "learning_rate": 2.8952426108982693e-05, + "loss": 0.0172, + "step": 10689, + "task_loss": 0.08371652662754059 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016979005187749863, + "epoch": 10.15, + "learning_rate": 2.8941904509429134e-05, + "loss": 0.0216, + "step": 10690, + "task_loss": 0.06295529007911682 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020230475813150406, + "epoch": 10.15, + "learning_rate": 2.8931382193809635e-05, + "loss": 0.0353, + "step": 10691, + "task_loss": 0.1709280014038086 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.041637878865003586, + "epoch": 10.15, + "learning_rate": 2.8920859164035625e-05, + "loss": 0.0392, + "step": 10692, + "task_loss": 0.017362238839268684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04988161846995354, + "epoch": 10.15, + "learning_rate": 2.8910335422018664e-05, + "loss": 0.048, + "step": 10693, + "task_loss": 0.031166965141892433 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021832330152392387, + "epoch": 10.16, + "learning_rate": 2.8899810969670448e-05, + "loss": 0.0259, + "step": 10694, + "task_loss": 0.06244866922497749 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.033625852316617966, + "epoch": 10.16, + "learning_rate": 2.8889285808902784e-05, + "loss": 0.0352, + "step": 10695, + "task_loss": 0.04985510930418968 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01881205290555954, + "epoch": 10.16, + "learning_rate": 2.887875994162762e-05, + "loss": 0.0177, + "step": 10696, + "task_loss": 0.007423171773552895 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031309157609939575, + "epoch": 10.16, + "learning_rate": 2.886823336975703e-05, + "loss": 0.0379, + "step": 10697, + "task_loss": 0.09693950414657593 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02167995274066925, + "epoch": 10.16, + "learning_rate": 2.885770609520323e-05, + "loss": 0.025, + "step": 10698, + "task_loss": 0.055332060903310776 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020276514813303947, + "epoch": 10.16, + "learning_rate": 2.8847178119878527e-05, + "loss": 0.0265, + "step": 10699, + "task_loss": 0.08290567249059677 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029720699414610863, + "epoch": 10.16, + "learning_rate": 2.883664944569539e-05, + "loss": 0.0503, + "step": 10700, + "task_loss": 0.2355862855911255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.037131913006305695, + "epoch": 10.16, + "learning_rate": 2.8826120074566414e-05, + "loss": 0.0397, + "step": 10701, + "task_loss": 0.06265319883823395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02705603837966919, + "epoch": 10.16, + "learning_rate": 2.8815590008404293e-05, + "loss": 0.0279, + "step": 10702, + "task_loss": 0.03594599664211273 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02101530134677887, + "epoch": 10.16, + "learning_rate": 2.8805059249121874e-05, + "loss": 0.0222, + "step": 10703, + "task_loss": 0.03242805600166321 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01382765918970108, + "epoch": 10.17, + "learning_rate": 2.8794527798632117e-05, + "loss": 0.023, + "step": 10704, + "task_loss": 0.10595635324716568 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018166912719607353, + "epoch": 10.17, + "learning_rate": 2.8783995658848105e-05, + "loss": 0.023, + "step": 10705, + "task_loss": 0.06630286574363708 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.042599309235811234, + "epoch": 10.17, + "learning_rate": 2.877346283168306e-05, + "loss": 0.0468, + "step": 10706, + "task_loss": 0.08451828360557556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039667222648859024, + "epoch": 10.17, + "learning_rate": 2.876292931905032e-05, + "loss": 0.0363, + "step": 10707, + "task_loss": 0.0058592017740011215 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.046238042414188385, + "epoch": 10.17, + "learning_rate": 2.875239512286335e-05, + "loss": 0.0441, + "step": 10708, + "task_loss": 0.024881619960069656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010262496769428253, + "epoch": 10.17, + "learning_rate": 2.8741860245035722e-05, + "loss": 0.0095, + "step": 10709, + "task_loss": 0.0028820428997278214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029114559292793274, + "epoch": 10.17, + "learning_rate": 2.8731324687481176e-05, + "loss": 0.0331, + "step": 10710, + "task_loss": 0.06866513192653656 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.047519125044345856, + "epoch": 10.17, + "learning_rate": 2.8720788452113517e-05, + "loss": 0.0558, + "step": 10711, + "task_loss": 0.13019928336143494 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03364911675453186, + "epoch": 10.17, + "learning_rate": 2.8710251540846723e-05, + "loss": 0.0376, + "step": 10712, + "task_loss": 0.07327281683683395 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017668629065155983, + "epoch": 10.17, + "learning_rate": 2.8699713955594864e-05, + "loss": 0.019, + "step": 10713, + "task_loss": 0.03053228370845318 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04549794644117355, + "epoch": 10.17, + "learning_rate": 2.8689175698272147e-05, + "loss": 0.0501, + "step": 10714, + "task_loss": 0.09190307557582855 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01674514450132847, + "epoch": 10.18, + "learning_rate": 2.8678636770792906e-05, + "loss": 0.0203, + "step": 10715, + "task_loss": 0.05194873362779617 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018080132082104683, + "epoch": 10.18, + "learning_rate": 2.8668097175071572e-05, + "loss": 0.0263, + "step": 10716, + "task_loss": 0.10066099464893341 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03006262332201004, + "epoch": 10.18, + "learning_rate": 2.865755691302272e-05, + "loss": 0.0329, + "step": 10717, + "task_loss": 0.05870826542377472 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015684347599744797, + "epoch": 10.18, + "learning_rate": 2.864701598656104e-05, + "loss": 0.0182, + "step": 10718, + "task_loss": 0.040366411209106445 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010996408760547638, + "epoch": 10.18, + "learning_rate": 2.8636474397601343e-05, + "loss": 0.0106, + "step": 10719, + "task_loss": 0.00703669898211956 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03208538889884949, + "epoch": 10.18, + "learning_rate": 2.862593214805856e-05, + "loss": 0.0318, + "step": 10720, + "task_loss": 0.029489625245332718 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04310718923807144, + "epoch": 10.18, + "learning_rate": 2.8615389239847734e-05, + "loss": 0.0414, + "step": 10721, + "task_loss": 0.025636808946728706 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.061272382736206055, + "epoch": 10.18, + "learning_rate": 2.8604845674884045e-05, + "loss": 0.0585, + "step": 10722, + "task_loss": 0.03352653980255127 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01941574551165104, + "epoch": 10.18, + "learning_rate": 2.8594301455082777e-05, + "loss": 0.0198, + "step": 10723, + "task_loss": 0.023156698793172836 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013191182166337967, + "epoch": 10.18, + "learning_rate": 2.8583756582359338e-05, + "loss": 0.0123, + "step": 10724, + "task_loss": 0.004337495192885399 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01237378641963005, + "epoch": 10.19, + "learning_rate": 2.8573211058629262e-05, + "loss": 0.0121, + "step": 10725, + "task_loss": 0.009604766964912415 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020672522485256195, + "epoch": 10.19, + "learning_rate": 2.8562664885808176e-05, + "loss": 0.0237, + "step": 10726, + "task_loss": 0.050802476704120636 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018164366483688354, + "epoch": 10.19, + "learning_rate": 2.8552118065811868e-05, + "loss": 0.0254, + "step": 10727, + "task_loss": 0.09089714288711548 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012251758947968483, + "epoch": 10.19, + "learning_rate": 2.85415706005562e-05, + "loss": 0.0149, + "step": 10728, + "task_loss": 0.03885126858949661 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021103456616401672, + "epoch": 10.19, + "learning_rate": 2.8531022491957178e-05, + "loss": 0.0238, + "step": 10729, + "task_loss": 0.048550285398960114 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021369164809584618, + "epoch": 10.19, + "learning_rate": 2.852047374193092e-05, + "loss": 0.0323, + "step": 10730, + "task_loss": 0.1303352415561676 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013989603146910667, + "epoch": 10.19, + "learning_rate": 2.850992435239364e-05, + "loss": 0.0133, + "step": 10731, + "task_loss": 0.006928419694304466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03571196272969246, + "epoch": 10.19, + "learning_rate": 2.8499374325261708e-05, + "loss": 0.0377, + "step": 10732, + "task_loss": 0.05529388412833214 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01848205178976059, + "epoch": 10.19, + "learning_rate": 2.848882366245157e-05, + "loss": 0.023, + "step": 10733, + "task_loss": 0.06377825886011124 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014538046903908253, + "epoch": 10.19, + "learning_rate": 2.847827236587982e-05, + "loss": 0.0203, + "step": 10734, + "task_loss": 0.07199336588382721 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026060445234179497, + "epoch": 10.19, + "learning_rate": 2.846772043746313e-05, + "loss": 0.0239, + "step": 10735, + "task_loss": 0.004935260862112045 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02200356312096119, + "epoch": 10.2, + "learning_rate": 2.845716787911833e-05, + "loss": 0.0247, + "step": 10736, + "task_loss": 0.04902653768658638 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.10178372263908386, + "epoch": 10.2, + "learning_rate": 2.8446614692762336e-05, + "loss": 0.0991, + "step": 10737, + "task_loss": 0.07502306997776031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03487898409366608, + "epoch": 10.2, + "learning_rate": 2.843606088031218e-05, + "loss": 0.0367, + "step": 10738, + "task_loss": 0.05333896726369858 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014870396815240383, + "epoch": 10.2, + "learning_rate": 2.842550644368502e-05, + "loss": 0.0235, + "step": 10739, + "task_loss": 0.1009284108877182 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017139773815870285, + "epoch": 10.2, + "learning_rate": 2.841495138479811e-05, + "loss": 0.0162, + "step": 10740, + "task_loss": 0.008153453469276428 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02908232994377613, + "epoch": 10.2, + "learning_rate": 2.8404395705568848e-05, + "loss": 0.0271, + "step": 10741, + "task_loss": 0.008917009457945824 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04850727319717407, + "epoch": 10.2, + "learning_rate": 2.8393839407914702e-05, + "loss": 0.0502, + "step": 10742, + "task_loss": 0.06578432023525238 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017160603776574135, + "epoch": 10.2, + "learning_rate": 2.8383282493753283e-05, + "loss": 0.0163, + "step": 10743, + "task_loss": 0.008876651525497437 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03274647891521454, + "epoch": 10.2, + "learning_rate": 2.83727249650023e-05, + "loss": 0.0303, + "step": 10744, + "task_loss": 0.008528593927621841 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023494988679885864, + "epoch": 10.2, + "learning_rate": 2.836216682357959e-05, + "loss": 0.0296, + "step": 10745, + "task_loss": 0.08411206305027008 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.045075200498104095, + "epoch": 10.21, + "learning_rate": 2.8351608071403085e-05, + "loss": 0.0411, + "step": 10746, + "task_loss": 0.005580326542258263 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07080404460430145, + "epoch": 10.21, + "learning_rate": 2.8341048710390832e-05, + "loss": 0.0653, + "step": 10747, + "task_loss": 0.016258470714092255 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02719147875905037, + "epoch": 10.21, + "learning_rate": 2.8330488742460987e-05, + "loss": 0.0254, + "step": 10748, + "task_loss": 0.009210776537656784 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03383823484182358, + "epoch": 10.21, + "learning_rate": 2.8319928169531825e-05, + "loss": 0.0556, + "step": 10749, + "task_loss": 0.2511603534221649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014685697853565216, + "epoch": 10.21, + "learning_rate": 2.830936699352172e-05, + "loss": 0.0209, + "step": 10750, + "task_loss": 0.07651330530643463 + }, + { + "epoch": 10.21, + "eval_accuracy": 0.9025229357798165, + "eval_loss": 0.4256412982940674, + "eval_runtime": 18.2731, + "eval_samples_per_second": 47.721, + "eval_steps_per_second": 5.965, + "step": 10750 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014020141214132309, + "epoch": 10.21, + "learning_rate": 2.8298805216349167e-05, + "loss": 0.021, + "step": 10751, + "task_loss": 0.08416274189949036 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03295741230249405, + "epoch": 10.21, + "learning_rate": 2.8288242839932744e-05, + "loss": 0.0485, + "step": 10752, + "task_loss": 0.18834072351455688 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013736523687839508, + "epoch": 10.21, + "learning_rate": 2.8277679866191194e-05, + "loss": 0.0133, + "step": 10753, + "task_loss": 0.009392468258738518 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05337664112448692, + "epoch": 10.21, + "learning_rate": 2.8267116297043294e-05, + "loss": 0.0524, + "step": 10754, + "task_loss": 0.0431857630610466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06362185627222061, + "epoch": 10.21, + "learning_rate": 2.8256552134407993e-05, + "loss": 0.0686, + "step": 10755, + "task_loss": 0.11329066753387451 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.050114959478378296, + "epoch": 10.21, + "learning_rate": 2.8245987380204313e-05, + "loss": 0.0566, + "step": 10756, + "task_loss": 0.11529053747653961 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03267570957541466, + "epoch": 10.22, + "learning_rate": 2.8235422036351382e-05, + "loss": 0.0373, + "step": 10757, + "task_loss": 0.07926565408706665 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013439756818115711, + "epoch": 10.22, + "learning_rate": 2.822485610476847e-05, + "loss": 0.013, + "step": 10758, + "task_loss": 0.008762186393141747 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02206001617014408, + "epoch": 10.22, + "learning_rate": 2.8214289587374908e-05, + "loss": 0.0288, + "step": 10759, + "task_loss": 0.08915533125400543 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01932983100414276, + "epoch": 10.22, + "learning_rate": 2.8203722486090168e-05, + "loss": 0.0201, + "step": 10760, + "task_loss": 0.026729634031653404 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012917459942400455, + "epoch": 10.22, + "learning_rate": 2.8193154802833803e-05, + "loss": 0.0127, + "step": 10761, + "task_loss": 0.010323688387870789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022919047623872757, + "epoch": 10.22, + "learning_rate": 2.818258653952549e-05, + "loss": 0.0217, + "step": 10762, + "task_loss": 0.01115039736032486 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01916324719786644, + "epoch": 10.22, + "learning_rate": 2.8172017698085013e-05, + "loss": 0.018, + "step": 10763, + "task_loss": 0.00737486407160759 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01026608981192112, + "epoch": 10.22, + "learning_rate": 2.816144828043224e-05, + "loss": 0.0163, + "step": 10764, + "task_loss": 0.07105138152837753 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018040932714939117, + "epoch": 10.22, + "learning_rate": 2.8150878288487155e-05, + "loss": 0.0168, + "step": 10765, + "task_loss": 0.0060977693647146225 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012006964534521103, + "epoch": 10.22, + "learning_rate": 2.8140307724169857e-05, + "loss": 0.0113, + "step": 10766, + "task_loss": 0.004458732903003693 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.036806024610996246, + "epoch": 10.23, + "learning_rate": 2.812973658940054e-05, + "loss": 0.0391, + "step": 10767, + "task_loss": 0.05974595993757248 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.039469681680202484, + "epoch": 10.23, + "learning_rate": 2.8119164886099504e-05, + "loss": 0.048, + "step": 10768, + "task_loss": 0.12487009167671204 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011058198288083076, + "epoch": 10.23, + "learning_rate": 2.8108592616187133e-05, + "loss": 0.0105, + "step": 10769, + "task_loss": 0.005702011287212372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01453112531453371, + "epoch": 10.23, + "learning_rate": 2.8098019781583944e-05, + "loss": 0.0201, + "step": 10770, + "task_loss": 0.07058731466531754 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01812460459768772, + "epoch": 10.23, + "learning_rate": 2.8087446384210547e-05, + "loss": 0.0173, + "step": 10771, + "task_loss": 0.01019604504108429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014030968770384789, + "epoch": 10.23, + "learning_rate": 2.8076872425987637e-05, + "loss": 0.0138, + "step": 10772, + "task_loss": 0.011378861963748932 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02499603107571602, + "epoch": 10.23, + "learning_rate": 2.8066297908836043e-05, + "loss": 0.0273, + "step": 10773, + "task_loss": 0.04773323982954025 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04917794466018677, + "epoch": 10.23, + "learning_rate": 2.8055722834676658e-05, + "loss": 0.0502, + "step": 10774, + "task_loss": 0.05955827608704567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04151075705885887, + "epoch": 10.23, + "learning_rate": 2.804514720543051e-05, + "loss": 0.0442, + "step": 10775, + "task_loss": 0.06856313347816467 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06127103045582771, + "epoch": 10.23, + "learning_rate": 2.80345710230187e-05, + "loss": 0.0622, + "step": 10776, + "task_loss": 0.07090768218040466 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015070604160428047, + "epoch": 10.23, + "learning_rate": 2.802399428936246e-05, + "loss": 0.0141, + "step": 10777, + "task_loss": 0.004883896559476852 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017283853143453598, + "epoch": 10.24, + "learning_rate": 2.8013417006383076e-05, + "loss": 0.0266, + "step": 10778, + "task_loss": 0.11040695756673813 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018551714718341827, + "epoch": 10.24, + "learning_rate": 2.8002839176001987e-05, + "loss": 0.0171, + "step": 10779, + "task_loss": 0.004415277391672134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02501179277896881, + "epoch": 10.24, + "learning_rate": 2.799226080014071e-05, + "loss": 0.0422, + "step": 10780, + "task_loss": 0.19670268893241882 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012673512101173401, + "epoch": 10.24, + "learning_rate": 2.7981681880720838e-05, + "loss": 0.0133, + "step": 10781, + "task_loss": 0.018554171547293663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014598320238292217, + "epoch": 10.24, + "learning_rate": 2.7971102419664103e-05, + "loss": 0.0141, + "step": 10782, + "task_loss": 0.009689368307590485 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014966873452067375, + "epoch": 10.24, + "learning_rate": 2.7960522418892288e-05, + "loss": 0.0262, + "step": 10783, + "task_loss": 0.12777450680732727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01800619065761566, + "epoch": 10.24, + "learning_rate": 2.794994188032733e-05, + "loss": 0.0173, + "step": 10784, + "task_loss": 0.011067090556025505 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01135667972266674, + "epoch": 10.24, + "learning_rate": 2.7939360805891218e-05, + "loss": 0.0107, + "step": 10785, + "task_loss": 0.004777856171131134 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025919828563928604, + "epoch": 10.24, + "learning_rate": 2.7928779197506056e-05, + "loss": 0.0289, + "step": 10786, + "task_loss": 0.05572760850191116 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016921542584896088, + "epoch": 10.24, + "learning_rate": 2.7918197057094054e-05, + "loss": 0.0157, + "step": 10787, + "task_loss": 0.004976712167263031 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025802042335271835, + "epoch": 10.25, + "learning_rate": 2.7907614386577497e-05, + "loss": 0.0428, + "step": 10788, + "task_loss": 0.19616416096687317 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008495388552546501, + "epoch": 10.25, + "learning_rate": 2.789703118787879e-05, + "loss": 0.0079, + "step": 10789, + "task_loss": 0.002873443067073822 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016510333865880966, + "epoch": 10.25, + "learning_rate": 2.7886447462920412e-05, + "loss": 0.0287, + "step": 10790, + "task_loss": 0.13880857825279236 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03891441598534584, + "epoch": 10.25, + "learning_rate": 2.787586321362495e-05, + "loss": 0.046, + "step": 10791, + "task_loss": 0.1098531037569046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012705841101706028, + "epoch": 10.25, + "learning_rate": 2.7865278441915082e-05, + "loss": 0.0149, + "step": 10792, + "task_loss": 0.034181784838438034 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01141888927668333, + "epoch": 10.25, + "learning_rate": 2.785469314971359e-05, + "loss": 0.0187, + "step": 10793, + "task_loss": 0.08427157998085022 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017081687226891518, + "epoch": 10.25, + "learning_rate": 2.7844107338943343e-05, + "loss": 0.0201, + "step": 10794, + "task_loss": 0.0467962883412838 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009645327925682068, + "epoch": 10.25, + "learning_rate": 2.7833521011527293e-05, + "loss": 0.0128, + "step": 10795, + "task_loss": 0.041607990860939026 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.032230883836746216, + "epoch": 10.25, + "learning_rate": 2.782293416938851e-05, + "loss": 0.0446, + "step": 10796, + "task_loss": 0.15550780296325684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07105334848165512, + "epoch": 10.25, + "learning_rate": 2.7812346814450135e-05, + "loss": 0.0718, + "step": 10797, + "task_loss": 0.07855713367462158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013354838825762272, + "epoch": 10.25, + "learning_rate": 2.7801758948635414e-05, + "loss": 0.0164, + "step": 10798, + "task_loss": 0.04363374784588814 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009988810867071152, + "epoch": 10.26, + "learning_rate": 2.77911705738677e-05, + "loss": 0.0175, + "step": 10799, + "task_loss": 0.08463583886623383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015538221225142479, + "epoch": 10.26, + "learning_rate": 2.7780581692070395e-05, + "loss": 0.0146, + "step": 10800, + "task_loss": 0.006343167275190353 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01280184555798769, + "epoch": 10.26, + "learning_rate": 2.7769992305167043e-05, + "loss": 0.0203, + "step": 10801, + "task_loss": 0.08731023967266083 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05490295588970184, + "epoch": 10.26, + "learning_rate": 2.775940241508124e-05, + "loss": 0.0581, + "step": 10802, + "task_loss": 0.08693551272153854 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.006106989458203316, + "epoch": 10.26, + "learning_rate": 2.774881202373671e-05, + "loss": 0.0136, + "step": 10803, + "task_loss": 0.0810597613453865 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05367905646562576, + "epoch": 10.26, + "learning_rate": 2.773822113305723e-05, + "loss": 0.0661, + "step": 10804, + "task_loss": 0.17820340394973755 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011592602357268333, + "epoch": 10.26, + "learning_rate": 2.7727629744966695e-05, + "loss": 0.0109, + "step": 10805, + "task_loss": 0.004181232303380966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016554832458496094, + "epoch": 10.26, + "learning_rate": 2.7717037861389082e-05, + "loss": 0.016, + "step": 10806, + "task_loss": 0.010724140331149101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015016058459877968, + "epoch": 10.26, + "learning_rate": 2.7706445484248454e-05, + "loss": 0.0143, + "step": 10807, + "task_loss": 0.007696835324168205 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00887630321085453, + "epoch": 10.26, + "learning_rate": 2.769585261546897e-05, + "loss": 0.013, + "step": 10808, + "task_loss": 0.05019646883010864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02640136145055294, + "epoch": 10.26, + "learning_rate": 2.768525925697487e-05, + "loss": 0.0284, + "step": 10809, + "task_loss": 0.04624557122588158 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.08202344924211502, + "epoch": 10.27, + "learning_rate": 2.76746654106905e-05, + "loss": 0.0864, + "step": 10810, + "task_loss": 0.125411257147789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051587365567684174, + "epoch": 10.27, + "learning_rate": 2.7664071078540282e-05, + "loss": 0.0557, + "step": 10811, + "task_loss": 0.0922786295413971 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01615780219435692, + "epoch": 10.27, + "learning_rate": 2.7653476262448713e-05, + "loss": 0.0155, + "step": 10812, + "task_loss": 0.009321728721261024 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06020238250494003, + "epoch": 10.27, + "learning_rate": 2.76428809643404e-05, + "loss": 0.0655, + "step": 10813, + "task_loss": 0.11301226913928986 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025619039312005043, + "epoch": 10.27, + "learning_rate": 2.763228518614004e-05, + "loss": 0.0258, + "step": 10814, + "task_loss": 0.027688482776284218 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011568024754524231, + "epoch": 10.27, + "learning_rate": 2.7621688929772393e-05, + "loss": 0.0181, + "step": 10815, + "task_loss": 0.07710108906030655 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.048748280853033066, + "epoch": 10.27, + "learning_rate": 2.761109219716233e-05, + "loss": 0.0455, + "step": 10816, + "task_loss": 0.016586463898420334 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03522196784615517, + "epoch": 10.27, + "learning_rate": 2.760049499023479e-05, + "loss": 0.0463, + "step": 10817, + "task_loss": 0.1458013653755188 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010583333671092987, + "epoch": 10.27, + "learning_rate": 2.7589897310914814e-05, + "loss": 0.0098, + "step": 10818, + "task_loss": 0.002439044415950775 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0179889015853405, + "epoch": 10.27, + "learning_rate": 2.7579299161127513e-05, + "loss": 0.0167, + "step": 10819, + "task_loss": 0.0054456982761621475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014011922292411327, + "epoch": 10.28, + "learning_rate": 2.756870054279811e-05, + "loss": 0.0139, + "step": 10820, + "task_loss": 0.01266825757920742 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020268365740776062, + "epoch": 10.28, + "learning_rate": 2.755810145785187e-05, + "loss": 0.0187, + "step": 10821, + "task_loss": 0.004288617521524429 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025478918105363846, + "epoch": 10.28, + "learning_rate": 2.754750190821418e-05, + "loss": 0.025, + "step": 10822, + "task_loss": 0.020617567002773285 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021658936515450478, + "epoch": 10.28, + "learning_rate": 2.753690189581051e-05, + "loss": 0.0199, + "step": 10823, + "task_loss": 0.004049813374876976 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06283696740865707, + "epoch": 10.28, + "learning_rate": 2.752630142256638e-05, + "loss": 0.0597, + "step": 10824, + "task_loss": 0.03175070881843567 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011753297410905361, + "epoch": 10.28, + "learning_rate": 2.7515700490407443e-05, + "loss": 0.0111, + "step": 10825, + "task_loss": 0.00544341653585434 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09492142498493195, + "epoch": 10.28, + "learning_rate": 2.7505099101259386e-05, + "loss": 0.0944, + "step": 10826, + "task_loss": 0.08998773992061615 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007941950112581253, + "epoch": 10.28, + "learning_rate": 2.749449725704802e-05, + "loss": 0.0077, + "step": 10827, + "task_loss": 0.005958493798971176 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018316073343157768, + "epoch": 10.28, + "learning_rate": 2.748389495969921e-05, + "loss": 0.0302, + "step": 10828, + "task_loss": 0.1371515393257141 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022628696635365486, + "epoch": 10.28, + "learning_rate": 2.747329221113891e-05, + "loss": 0.0268, + "step": 10829, + "task_loss": 0.06442567706108093 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012505222111940384, + "epoch": 10.28, + "learning_rate": 2.7462689013293176e-05, + "loss": 0.0118, + "step": 10830, + "task_loss": 0.005520684644579887 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02343965508043766, + "epoch": 10.29, + "learning_rate": 2.745208536808812e-05, + "loss": 0.0239, + "step": 10831, + "task_loss": 0.027552150189876556 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05450985208153725, + "epoch": 10.29, + "learning_rate": 2.7441481277449954e-05, + "loss": 0.06, + "step": 10832, + "task_loss": 0.10902714729309082 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023538943380117416, + "epoch": 10.29, + "learning_rate": 2.743087674330495e-05, + "loss": 0.0287, + "step": 10833, + "task_loss": 0.07561871409416199 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01717299409210682, + "epoch": 10.29, + "learning_rate": 2.742027176757948e-05, + "loss": 0.0172, + "step": 10834, + "task_loss": 0.01727975904941559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02774146758019924, + "epoch": 10.29, + "learning_rate": 2.7409666352199986e-05, + "loss": 0.0257, + "step": 10835, + "task_loss": 0.007454710081219673 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020189553499221802, + "epoch": 10.29, + "learning_rate": 2.7399060499092992e-05, + "loss": 0.0191, + "step": 10836, + "task_loss": 0.009731443598866463 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06831050664186478, + "epoch": 10.29, + "learning_rate": 2.7388454210185115e-05, + "loss": 0.067, + "step": 10837, + "task_loss": 0.055503882467746735 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013829356990754604, + "epoch": 10.29, + "learning_rate": 2.7377847487403018e-05, + "loss": 0.0128, + "step": 10838, + "task_loss": 0.003266597166657448 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014532563276588917, + "epoch": 10.29, + "learning_rate": 2.736724033267347e-05, + "loss": 0.0153, + "step": 10839, + "task_loss": 0.021880408748984337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05628987401723862, + "epoch": 10.29, + "learning_rate": 2.7356632747923322e-05, + "loss": 0.0626, + "step": 10840, + "task_loss": 0.11941660940647125 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.051775701344013214, + "epoch": 10.3, + "learning_rate": 2.7346024735079486e-05, + "loss": 0.0678, + "step": 10841, + "task_loss": 0.21175605058670044 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027265068143606186, + "epoch": 10.3, + "learning_rate": 2.7335416296068962e-05, + "loss": 0.0256, + "step": 10842, + "task_loss": 0.010892918333411217 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028751587495207787, + "epoch": 10.3, + "learning_rate": 2.7324807432818805e-05, + "loss": 0.0302, + "step": 10843, + "task_loss": 0.043142762035131454 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01213914155960083, + "epoch": 10.3, + "learning_rate": 2.731419814725619e-05, + "loss": 0.0111, + "step": 10844, + "task_loss": 0.002093670889735222 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021539079025387764, + "epoch": 10.3, + "learning_rate": 2.730358844130834e-05, + "loss": 0.0212, + "step": 10845, + "task_loss": 0.018369406461715698 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03298931196331978, + "epoch": 10.3, + "learning_rate": 2.729297831690255e-05, + "loss": 0.0398, + "step": 10846, + "task_loss": 0.10142332315444946 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016631023958325386, + "epoch": 10.3, + "learning_rate": 2.728236777596621e-05, + "loss": 0.017, + "step": 10847, + "task_loss": 0.020175570622086525 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011871835216879845, + "epoch": 10.3, + "learning_rate": 2.7271756820426763e-05, + "loss": 0.0125, + "step": 10848, + "task_loss": 0.017757927998900414 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0549296960234642, + "epoch": 10.3, + "learning_rate": 2.7261145452211763e-05, + "loss": 0.0602, + "step": 10849, + "task_loss": 0.10732554644346237 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020789777860045433, + "epoch": 10.3, + "learning_rate": 2.725053367324879e-05, + "loss": 0.0202, + "step": 10850, + "task_loss": 0.014408687129616737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011434385553002357, + "epoch": 10.3, + "learning_rate": 2.723992148546554e-05, + "loss": 0.0134, + "step": 10851, + "task_loss": 0.031306661665439606 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014954166486859322, + "epoch": 10.31, + "learning_rate": 2.7229308890789767e-05, + "loss": 0.0181, + "step": 10852, + "task_loss": 0.046106450259685516 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010979441925883293, + "epoch": 10.31, + "learning_rate": 2.7218695891149293e-05, + "loss": 0.0102, + "step": 10853, + "task_loss": 0.003026876598596573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015950333327054977, + "epoch": 10.31, + "learning_rate": 2.720808248847203e-05, + "loss": 0.0345, + "step": 10854, + "task_loss": 0.2015371024608612 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02772637829184532, + "epoch": 10.31, + "learning_rate": 2.719746868468595e-05, + "loss": 0.0369, + "step": 10855, + "task_loss": 0.11981463432312012 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031578004360198975, + "epoch": 10.31, + "learning_rate": 2.7186854481719092e-05, + "loss": 0.0397, + "step": 10856, + "task_loss": 0.11244003474712372 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007710220292210579, + "epoch": 10.31, + "learning_rate": 2.7176239881499595e-05, + "loss": 0.0072, + "step": 10857, + "task_loss": 0.002641640603542328 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009416783228516579, + "epoch": 10.31, + "learning_rate": 2.716562488595563e-05, + "loss": 0.0152, + "step": 10858, + "task_loss": 0.06755001842975616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06231432408094406, + "epoch": 10.31, + "learning_rate": 2.715500949701549e-05, + "loss": 0.0701, + "step": 10859, + "task_loss": 0.14002208411693573 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027409039437770844, + "epoch": 10.31, + "learning_rate": 2.7144393716607486e-05, + "loss": 0.0394, + "step": 10860, + "task_loss": 0.147533118724823 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04957464337348938, + "epoch": 10.31, + "learning_rate": 2.713377754666004e-05, + "loss": 0.0626, + "step": 10861, + "task_loss": 0.17979982495307922 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05504240095615387, + "epoch": 10.32, + "learning_rate": 2.712316098910162e-05, + "loss": 0.0618, + "step": 10862, + "task_loss": 0.12275400012731552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015080356039106846, + "epoch": 10.32, + "learning_rate": 2.711254404586079e-05, + "loss": 0.0287, + "step": 10863, + "task_loss": 0.1511533409357071 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008478467352688313, + "epoch": 10.32, + "learning_rate": 2.7101926718866156e-05, + "loss": 0.014, + "step": 10864, + "task_loss": 0.06384154409170151 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.1419282853603363, + "epoch": 10.32, + "learning_rate": 2.7091309010046408e-05, + "loss": 0.1438, + "step": 10865, + "task_loss": 0.1609189212322235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018446022644639015, + "epoch": 10.32, + "learning_rate": 2.708069092133031e-05, + "loss": 0.0171, + "step": 10866, + "task_loss": 0.005260376259684563 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0161033496260643, + "epoch": 10.32, + "learning_rate": 2.7070072454646683e-05, + "loss": 0.0208, + "step": 10867, + "task_loss": 0.06297904253005981 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06171935796737671, + "epoch": 10.32, + "learning_rate": 2.7059453611924433e-05, + "loss": 0.0676, + "step": 10868, + "task_loss": 0.12061215192079544 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.028885137289762497, + "epoch": 10.32, + "learning_rate": 2.7048834395092505e-05, + "loss": 0.0334, + "step": 10869, + "task_loss": 0.07451960444450378 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011872227303683758, + "epoch": 10.32, + "learning_rate": 2.7038214806079948e-05, + "loss": 0.0111, + "step": 10870, + "task_loss": 0.0037200450897216797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03222740814089775, + "epoch": 10.32, + "learning_rate": 2.702759484681585e-05, + "loss": 0.0405, + "step": 10871, + "task_loss": 0.11447662115097046 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013929888606071472, + "epoch": 10.32, + "learning_rate": 2.701697451922939e-05, + "loss": 0.0206, + "step": 10872, + "task_loss": 0.08043880015611649 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013857328332960606, + "epoch": 10.33, + "learning_rate": 2.7006353825249792e-05, + "loss": 0.015, + "step": 10873, + "task_loss": 0.025718068704009056 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020384974777698517, + "epoch": 10.33, + "learning_rate": 2.6995732766806354e-05, + "loss": 0.0243, + "step": 10874, + "task_loss": 0.059511590749025345 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010584133677184582, + "epoch": 10.33, + "learning_rate": 2.6985111345828452e-05, + "loss": 0.0111, + "step": 10875, + "task_loss": 0.01563730277121067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03728438913822174, + "epoch": 10.33, + "learning_rate": 2.6974489564245513e-05, + "loss": 0.038, + "step": 10876, + "task_loss": 0.044412482529878616 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018023356795310974, + "epoch": 10.33, + "learning_rate": 2.6963867423987032e-05, + "loss": 0.0171, + "step": 10877, + "task_loss": 0.008735572919249535 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024205505847930908, + "epoch": 10.33, + "learning_rate": 2.695324492698258e-05, + "loss": 0.0222, + "step": 10878, + "task_loss": 0.004576884210109711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04197827726602554, + "epoch": 10.33, + "learning_rate": 2.694262207516178e-05, + "loss": 0.0406, + "step": 10879, + "task_loss": 0.028400206938385963 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015484409406781197, + "epoch": 10.33, + "learning_rate": 2.6931998870454327e-05, + "loss": 0.0145, + "step": 10880, + "task_loss": 0.005911100655794144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014505671337246895, + "epoch": 10.33, + "learning_rate": 2.692137531478997e-05, + "loss": 0.0135, + "step": 10881, + "task_loss": 0.003982797265052795 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024028435349464417, + "epoch": 10.33, + "learning_rate": 2.6910751410098532e-05, + "loss": 0.0294, + "step": 10882, + "task_loss": 0.07786644250154495 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05532676726579666, + "epoch": 10.34, + "learning_rate": 2.6900127158309903e-05, + "loss": 0.0686, + "step": 10883, + "task_loss": 0.18820956349372864 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.00881568156182766, + "epoch": 10.34, + "learning_rate": 2.688950256135402e-05, + "loss": 0.0155, + "step": 10884, + "task_loss": 0.07542967796325684 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007144515868276358, + "epoch": 10.34, + "learning_rate": 2.6878877621160904e-05, + "loss": 0.0068, + "step": 10885, + "task_loss": 0.0034852921962738037 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03914850950241089, + "epoch": 10.34, + "learning_rate": 2.686825233966061e-05, + "loss": 0.036, + "step": 10886, + "task_loss": 0.007247054949402809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03601888567209244, + "epoch": 10.34, + "learning_rate": 2.6857626718783285e-05, + "loss": 0.0516, + "step": 10887, + "task_loss": 0.19196146726608276 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0399981364607811, + "epoch": 10.34, + "learning_rate": 2.6847000760459118e-05, + "loss": 0.0487, + "step": 10888, + "task_loss": 0.12683525681495667 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007798383943736553, + "epoch": 10.34, + "learning_rate": 2.683637446661837e-05, + "loss": 0.0138, + "step": 10889, + "task_loss": 0.06797172874212265 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04516351968050003, + "epoch": 10.34, + "learning_rate": 2.6825747839191362e-05, + "loss": 0.0543, + "step": 10890, + "task_loss": 0.13696971535682678 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05506161227822304, + "epoch": 10.34, + "learning_rate": 2.681512088010845e-05, + "loss": 0.0671, + "step": 10891, + "task_loss": 0.1754119098186493 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034937888383865356, + "epoch": 10.34, + "learning_rate": 2.6804493591300105e-05, + "loss": 0.0372, + "step": 10892, + "task_loss": 0.05717271566390991 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01608917862176895, + "epoch": 10.34, + "learning_rate": 2.6793865974696803e-05, + "loss": 0.0375, + "step": 10893, + "task_loss": 0.23010924458503723 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0070289126597344875, + "epoch": 10.35, + "learning_rate": 2.67832380322291e-05, + "loss": 0.0152, + "step": 10894, + "task_loss": 0.0884644091129303 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02088269032537937, + "epoch": 10.35, + "learning_rate": 2.6772609765827627e-05, + "loss": 0.0205, + "step": 10895, + "task_loss": 0.016729770228266716 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021521752700209618, + "epoch": 10.35, + "learning_rate": 2.6761981177423052e-05, + "loss": 0.0341, + "step": 10896, + "task_loss": 0.14689095318317413 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.030128255486488342, + "epoch": 10.35, + "learning_rate": 2.6751352268946118e-05, + "loss": 0.0355, + "step": 10897, + "task_loss": 0.08407264947891235 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01246646698564291, + "epoch": 10.35, + "learning_rate": 2.6740723042327598e-05, + "loss": 0.0171, + "step": 10898, + "task_loss": 0.0584290474653244 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01876392588019371, + "epoch": 10.35, + "learning_rate": 2.673009349949836e-05, + "loss": 0.0292, + "step": 10899, + "task_loss": 0.12266287952661514 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03964090347290039, + "epoch": 10.35, + "learning_rate": 2.6719463642389302e-05, + "loss": 0.0393, + "step": 10900, + "task_loss": 0.036599088460206985 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020488981157541275, + "epoch": 10.35, + "learning_rate": 2.6708833472931394e-05, + "loss": 0.0342, + "step": 10901, + "task_loss": 0.15787988901138306 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07471579313278198, + "epoch": 10.35, + "learning_rate": 2.669820299305566e-05, + "loss": 0.0884, + "step": 10902, + "task_loss": 0.21201379597187042 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.027081774547696114, + "epoch": 10.35, + "learning_rate": 2.6687572204693174e-05, + "loss": 0.0261, + "step": 10903, + "task_loss": 0.017036577686667442 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026169799268245697, + "epoch": 10.36, + "learning_rate": 2.667694110977506e-05, + "loss": 0.0241, + "step": 10904, + "task_loss": 0.005672993138432503 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020091958343982697, + "epoch": 10.36, + "learning_rate": 2.6666309710232522e-05, + "loss": 0.0294, + "step": 10905, + "task_loss": 0.11267304420471191 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.006835642736405134, + "epoch": 10.36, + "learning_rate": 2.6655678007996804e-05, + "loss": 0.0068, + "step": 10906, + "task_loss": 0.006012851372361183 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05445358157157898, + "epoch": 10.36, + "learning_rate": 2.66450460049992e-05, + "loss": 0.0629, + "step": 10907, + "task_loss": 0.13931649923324585 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024199290201067924, + "epoch": 10.36, + "learning_rate": 2.6634413703171058e-05, + "loss": 0.0294, + "step": 10908, + "task_loss": 0.07624688744544983 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014362799003720284, + "epoch": 10.36, + "learning_rate": 2.6623781104443806e-05, + "loss": 0.0138, + "step": 10909, + "task_loss": 0.009138602763414383 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.029530849307775497, + "epoch": 10.36, + "learning_rate": 2.6613148210748894e-05, + "loss": 0.0279, + "step": 10910, + "task_loss": 0.01313771866261959 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04332811385393143, + "epoch": 10.36, + "learning_rate": 2.6602515024017842e-05, + "loss": 0.045, + "step": 10911, + "task_loss": 0.06038268655538559 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0265837199985981, + "epoch": 10.36, + "learning_rate": 2.6591881546182216e-05, + "loss": 0.0263, + "step": 10912, + "task_loss": 0.024174809455871582 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023015733808279037, + "epoch": 10.36, + "learning_rate": 2.6581247779173635e-05, + "loss": 0.0233, + "step": 10913, + "task_loss": 0.026104921475052834 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02378995530307293, + "epoch": 10.36, + "learning_rate": 2.6570613724923788e-05, + "loss": 0.0292, + "step": 10914, + "task_loss": 0.07820422202348709 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011787974275648594, + "epoch": 10.37, + "learning_rate": 2.655997938536439e-05, + "loss": 0.0193, + "step": 10915, + "task_loss": 0.08679046481847763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023985158652067184, + "epoch": 10.37, + "learning_rate": 2.654934476242723e-05, + "loss": 0.0221, + "step": 10916, + "task_loss": 0.005446845665574074 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.025475893169641495, + "epoch": 10.37, + "learning_rate": 2.653870985804412e-05, + "loss": 0.024, + "step": 10917, + "task_loss": 0.010533835738897324 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04181814193725586, + "epoch": 10.37, + "learning_rate": 2.6528074674146963e-05, + "loss": 0.052, + "step": 10918, + "task_loss": 0.14384526014328003 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020111829042434692, + "epoch": 10.37, + "learning_rate": 2.6517439212667677e-05, + "loss": 0.0193, + "step": 10919, + "task_loss": 0.01181393675506115 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016157563775777817, + "epoch": 10.37, + "learning_rate": 2.6506803475538256e-05, + "loss": 0.0182, + "step": 10920, + "task_loss": 0.03620835393667221 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026654712855815887, + "epoch": 10.37, + "learning_rate": 2.649616746469072e-05, + "loss": 0.0281, + "step": 10921, + "task_loss": 0.04074002057313919 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014352642931044102, + "epoch": 10.37, + "learning_rate": 2.648553118205716e-05, + "loss": 0.0138, + "step": 10922, + "task_loss": 0.009018674492835999 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02536664716899395, + "epoch": 10.37, + "learning_rate": 2.6474894629569713e-05, + "loss": 0.0348, + "step": 10923, + "task_loss": 0.11997328698635101 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016859643161296844, + "epoch": 10.37, + "learning_rate": 2.6464257809160548e-05, + "loss": 0.023, + "step": 10924, + "task_loss": 0.07873199135065079 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01845327578485012, + "epoch": 10.38, + "learning_rate": 2.6453620722761896e-05, + "loss": 0.022, + "step": 10925, + "task_loss": 0.05434544384479523 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008304309099912643, + "epoch": 10.38, + "learning_rate": 2.6442983372306045e-05, + "loss": 0.0079, + "step": 10926, + "task_loss": 0.004198441281914711 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021259469911456108, + "epoch": 10.38, + "learning_rate": 2.643234575972531e-05, + "loss": 0.0265, + "step": 10927, + "task_loss": 0.07323883473873138 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008786037564277649, + "epoch": 10.38, + "learning_rate": 2.642170788695208e-05, + "loss": 0.0108, + "step": 10928, + "task_loss": 0.02885708585381508 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04073190689086914, + "epoch": 10.38, + "learning_rate": 2.6411069755918755e-05, + "loss": 0.044, + "step": 10929, + "task_loss": 0.07315443456172943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02872268483042717, + "epoch": 10.38, + "learning_rate": 2.6400431368557815e-05, + "loss": 0.035, + "step": 10930, + "task_loss": 0.09104539453983307 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007937128655612469, + "epoch": 10.38, + "learning_rate": 2.6389792726801778e-05, + "loss": 0.0074, + "step": 10931, + "task_loss": 0.0028884951025247574 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04766330122947693, + "epoch": 10.38, + "learning_rate": 2.6379153832583186e-05, + "loss": 0.0664, + "step": 10932, + "task_loss": 0.23463010787963867 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03247709199786186, + "epoch": 10.38, + "learning_rate": 2.6368514687834672e-05, + "loss": 0.0347, + "step": 10933, + "task_loss": 0.05483207106590271 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01322152093052864, + "epoch": 10.38, + "learning_rate": 2.6357875294488865e-05, + "loss": 0.0234, + "step": 10934, + "task_loss": 0.11500921845436096 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009567633271217346, + "epoch": 10.38, + "learning_rate": 2.6347235654478482e-05, + "loss": 0.0093, + "step": 10935, + "task_loss": 0.006448997184634209 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014935207553207874, + "epoch": 10.39, + "learning_rate": 2.6336595769736245e-05, + "loss": 0.0213, + "step": 10936, + "task_loss": 0.07824273407459259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011560074985027313, + "epoch": 10.39, + "learning_rate": 2.6325955642194948e-05, + "loss": 0.011, + "step": 10937, + "task_loss": 0.005575112998485565 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.031049327924847603, + "epoch": 10.39, + "learning_rate": 2.6315315273787428e-05, + "loss": 0.0339, + "step": 10938, + "task_loss": 0.059728413820266724 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.06698919832706451, + "epoch": 10.39, + "learning_rate": 2.630467466644655e-05, + "loss": 0.0754, + "step": 10939, + "task_loss": 0.15107224881649017 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014802630990743637, + "epoch": 10.39, + "learning_rate": 2.629403382210524e-05, + "loss": 0.0185, + "step": 10940, + "task_loss": 0.051325224339962006 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.008546770550310612, + "epoch": 10.39, + "learning_rate": 2.628339274269645e-05, + "loss": 0.0206, + "step": 10941, + "task_loss": 0.12929676473140717 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017933053895831108, + "epoch": 10.39, + "learning_rate": 2.6272751430153186e-05, + "loss": 0.0208, + "step": 10942, + "task_loss": 0.04631277918815613 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03135647252202034, + "epoch": 10.39, + "learning_rate": 2.62621098864085e-05, + "loss": 0.0291, + "step": 10943, + "task_loss": 0.008471904322504997 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009145300835371017, + "epoch": 10.39, + "learning_rate": 2.6251468113395465e-05, + "loss": 0.0087, + "step": 10944, + "task_loss": 0.005033126100897789 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009342268109321594, + "epoch": 10.39, + "learning_rate": 2.6240826113047235e-05, + "loss": 0.0093, + "step": 10945, + "task_loss": 0.008507819846272469 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0181414894759655, + "epoch": 10.4, + "learning_rate": 2.6230183887296955e-05, + "loss": 0.0168, + "step": 10946, + "task_loss": 0.00474889762699604 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03423862159252167, + "epoch": 10.4, + "learning_rate": 2.6219541438077855e-05, + "loss": 0.0467, + "step": 10947, + "task_loss": 0.15890000760555267 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.024610301479697227, + "epoch": 10.4, + "learning_rate": 2.620889876732317e-05, + "loss": 0.0291, + "step": 10948, + "task_loss": 0.06941775232553482 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013289782218635082, + "epoch": 10.4, + "learning_rate": 2.6198255876966204e-05, + "loss": 0.0125, + "step": 10949, + "task_loss": 0.005243049934506416 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026343289762735367, + "epoch": 10.4, + "learning_rate": 2.6187612768940293e-05, + "loss": 0.0311, + "step": 10950, + "task_loss": 0.0735698863863945 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.015885502099990845, + "epoch": 10.4, + "learning_rate": 2.61769694451788e-05, + "loss": 0.0194, + "step": 10951, + "task_loss": 0.051202740520238876 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.057844310998916626, + "epoch": 10.4, + "learning_rate": 2.616632590761514e-05, + "loss": 0.0603, + "step": 10952, + "task_loss": 0.08287277817726135 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012808309867978096, + "epoch": 10.4, + "learning_rate": 2.615568215818276e-05, + "loss": 0.0121, + "step": 10953, + "task_loss": 0.005606703460216522 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016954544931650162, + "epoch": 10.4, + "learning_rate": 2.6145038198815152e-05, + "loss": 0.0158, + "step": 10954, + "task_loss": 0.005178598687052727 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0657939538359642, + "epoch": 10.4, + "learning_rate": 2.6134394031445843e-05, + "loss": 0.0743, + "step": 10955, + "task_loss": 0.15127022564411163 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02318597212433815, + "epoch": 10.4, + "learning_rate": 2.6123749658008383e-05, + "loss": 0.0273, + "step": 10956, + "task_loss": 0.06436937302350998 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.050585560500621796, + "epoch": 10.41, + "learning_rate": 2.6113105080436396e-05, + "loss": 0.0512, + "step": 10957, + "task_loss": 0.05654662102460861 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.03527712821960449, + "epoch": 10.41, + "learning_rate": 2.6102460300663506e-05, + "loss": 0.0369, + "step": 10958, + "task_loss": 0.0516989640891552 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011321203783154488, + "epoch": 10.41, + "learning_rate": 2.60918153206234e-05, + "loss": 0.0106, + "step": 10959, + "task_loss": 0.00376252643764019 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014566227793693542, + "epoch": 10.41, + "learning_rate": 2.6081170142249773e-05, + "loss": 0.0197, + "step": 10960, + "task_loss": 0.06615670025348663 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01640435680747032, + "epoch": 10.41, + "learning_rate": 2.607052476747639e-05, + "loss": 0.0155, + "step": 10961, + "task_loss": 0.007749777287244797 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.0176126379519701, + "epoch": 10.41, + "learning_rate": 2.6059879198237026e-05, + "loss": 0.0162, + "step": 10962, + "task_loss": 0.0036756154149770737 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.034288398921489716, + "epoch": 10.41, + "learning_rate": 2.6049233436465498e-05, + "loss": 0.0426, + "step": 10963, + "task_loss": 0.1172315701842308 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.010387426242232323, + "epoch": 10.41, + "learning_rate": 2.6038587484095673e-05, + "loss": 0.0171, + "step": 10964, + "task_loss": 0.07761921733617783 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016161680221557617, + "epoch": 10.41, + "learning_rate": 2.6027941343061412e-05, + "loss": 0.024, + "step": 10965, + "task_loss": 0.0944659486413002 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.009701198898255825, + "epoch": 10.41, + "learning_rate": 2.6017295015296665e-05, + "loss": 0.0091, + "step": 10966, + "task_loss": 0.0035239197313785553 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02417180687189102, + "epoch": 10.42, + "learning_rate": 2.600664850273538e-05, + "loss": 0.0263, + "step": 10967, + "task_loss": 0.04507189989089966 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07505623996257782, + "epoch": 10.42, + "learning_rate": 2.599600180731155e-05, + "loss": 0.0926, + "step": 10968, + "task_loss": 0.2502027750015259 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.059122804552316666, + "epoch": 10.42, + "learning_rate": 2.598535493095919e-05, + "loss": 0.0613, + "step": 10969, + "task_loss": 0.08061361312866211 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01452677696943283, + "epoch": 10.42, + "learning_rate": 2.5974707875612357e-05, + "loss": 0.021, + "step": 10970, + "task_loss": 0.07877064496278763 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.019884496927261353, + "epoch": 10.42, + "learning_rate": 2.5964060643205153e-05, + "loss": 0.0246, + "step": 10971, + "task_loss": 0.06657460331916809 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013773739337921143, + "epoch": 10.42, + "learning_rate": 2.5953413235671688e-05, + "loss": 0.0197, + "step": 10972, + "task_loss": 0.07330326735973358 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021291140466928482, + "epoch": 10.42, + "learning_rate": 2.594276565494611e-05, + "loss": 0.0245, + "step": 10973, + "task_loss": 0.0534619465470314 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.038762666285037994, + "epoch": 10.42, + "learning_rate": 2.5932117902962616e-05, + "loss": 0.0361, + "step": 10974, + "task_loss": 0.012404365465044975 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.007273940369486809, + "epoch": 10.42, + "learning_rate": 2.5921469981655415e-05, + "loss": 0.007, + "step": 10975, + "task_loss": 0.004414750263094902 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.09941526502370834, + "epoch": 10.42, + "learning_rate": 2.591082189295876e-05, + "loss": 0.0983, + "step": 10976, + "task_loss": 0.08790935575962067 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013018698431551456, + "epoch": 10.42, + "learning_rate": 2.590017363880691e-05, + "loss": 0.0121, + "step": 10977, + "task_loss": 0.004249611869454384 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.014059076085686684, + "epoch": 10.43, + "learning_rate": 2.5889525221134192e-05, + "loss": 0.0204, + "step": 10978, + "task_loss": 0.07782687991857529 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013228708878159523, + "epoch": 10.43, + "learning_rate": 2.5878876641874928e-05, + "loss": 0.0125, + "step": 10979, + "task_loss": 0.005660973489284515 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.026601828634738922, + "epoch": 10.43, + "learning_rate": 2.5868227902963493e-05, + "loss": 0.0369, + "step": 10980, + "task_loss": 0.12926051020622253 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05293282866477966, + "epoch": 10.43, + "learning_rate": 2.5857579006334282e-05, + "loss": 0.0522, + "step": 10981, + "task_loss": 0.046048957854509354 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.07201385498046875, + "epoch": 10.43, + "learning_rate": 2.58469299539217e-05, + "loss": 0.0862, + "step": 10982, + "task_loss": 0.21407830715179443 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.011810416355729103, + "epoch": 10.43, + "learning_rate": 2.5836280747660225e-05, + "loss": 0.0173, + "step": 10983, + "task_loss": 0.0669318437576294 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.023423772305250168, + "epoch": 10.43, + "learning_rate": 2.5825631389484323e-05, + "loss": 0.0259, + "step": 10984, + "task_loss": 0.04826189577579498 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.022933730855584145, + "epoch": 10.43, + "learning_rate": 2.58149818813285e-05, + "loss": 0.033, + "step": 10985, + "task_loss": 0.12334097921848297 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.04635820910334587, + "epoch": 10.43, + "learning_rate": 2.5804332225127294e-05, + "loss": 0.0514, + "step": 10986, + "task_loss": 0.09712212532758713 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.012980525381863117, + "epoch": 10.43, + "learning_rate": 2.579368242281527e-05, + "loss": 0.02, + "step": 10987, + "task_loss": 0.08316182345151901 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017791984602808952, + "epoch": 10.43, + "learning_rate": 2.5783032476327007e-05, + "loss": 0.0227, + "step": 10988, + "task_loss": 0.06660401821136475 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.017145544290542603, + "epoch": 10.44, + "learning_rate": 2.5772382387597128e-05, + "loss": 0.0236, + "step": 10989, + "task_loss": 0.08195091038942337 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.018846768885850906, + "epoch": 10.44, + "learning_rate": 2.5761732158560263e-05, + "loss": 0.0216, + "step": 10990, + "task_loss": 0.04650796204805374 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.021697044372558594, + "epoch": 10.44, + "learning_rate": 2.5751081791151083e-05, + "loss": 0.0356, + "step": 10991, + "task_loss": 0.16030679643154144 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013004236854612827, + "epoch": 10.44, + "learning_rate": 2.574043128730428e-05, + "loss": 0.0135, + "step": 10992, + "task_loss": 0.017738407477736473 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.02329951897263527, + "epoch": 10.44, + "learning_rate": 2.572978064895457e-05, + "loss": 0.0298, + "step": 10993, + "task_loss": 0.08830928802490234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.020550910383462906, + "epoch": 10.44, + "learning_rate": 2.5719129878036686e-05, + "loss": 0.0193, + "step": 10994, + "task_loss": 0.007566181942820549 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05504726618528366, + "epoch": 10.44, + "learning_rate": 2.5708478976485402e-05, + "loss": 0.0543, + "step": 10995, + "task_loss": 0.04714515060186386 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05265543609857559, + "epoch": 10.44, + "learning_rate": 2.569782794623549e-05, + "loss": 0.0528, + "step": 10996, + "task_loss": 0.05369632691144943 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.01432766579091549, + "epoch": 10.44, + "learning_rate": 2.5687176789221784e-05, + "loss": 0.0136, + "step": 10997, + "task_loss": 0.0071462057530879974 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.016092218458652496, + "epoch": 10.44, + "learning_rate": 2.5676525507379097e-05, + "loss": 0.0155, + "step": 10998, + "task_loss": 0.010227423161268234 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.013000791892409325, + "epoch": 10.45, + "learning_rate": 2.566587410264229e-05, + "loss": 0.021, + "step": 10999, + "task_loss": 0.09294840693473816 + }, + { + "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, + "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, + "compression/magnitude_sparsity/target_sparsity_level": 0.8, + "compression_loss": 0.0, + "distillation_loss": 0.05334077402949333, + "epoch": 10.45, + "learning_rate": 2.565522257694625e-05, + "loss": 0.0488, + "step": 11000, + "task_loss": 0.0074908919632434845 + }, + { + "epoch": 10.45, + "eval_accuracy": 0.9128440366972477, + "eval_loss": 0.42293068766593933, + "eval_runtime": 17.8866, + "eval_samples_per_second": 48.752, + "eval_steps_per_second": 6.094, + "step": 11000 + } + ], + "max_steps": 29484, + "num_train_epochs": 28, + "total_flos": 4.62792613049088e+16, + "trial_name": null, + "trial_params": null +}