{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.006976358863899959,
  "eval_steps": 13,
  "global_step": 50,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00013952717727799919,
      "grad_norm": 0.2509855329990387,
      "learning_rate": 1e-05,
      "loss": 1.1406,
      "step": 1
    },
    {
      "epoch": 0.00013952717727799919,
      "eval_loss": 1.3329986333847046,
      "eval_runtime": 390.6167,
      "eval_samples_per_second": 30.902,
      "eval_steps_per_second": 15.452,
      "step": 1
    },
    {
      "epoch": 0.00027905435455599837,
      "grad_norm": 0.28208601474761963,
      "learning_rate": 2e-05,
      "loss": 1.2395,
      "step": 2
    },
    {
      "epoch": 0.00041858153183399753,
      "grad_norm": 0.2632408142089844,
      "learning_rate": 3e-05,
      "loss": 1.1857,
      "step": 3
    },
    {
      "epoch": 0.0005581087091119967,
      "grad_norm": 0.26442626118659973,
      "learning_rate": 4e-05,
      "loss": 1.2206,
      "step": 4
    },
    {
      "epoch": 0.0006976358863899959,
      "grad_norm": 0.2703617513179779,
      "learning_rate": 5e-05,
      "loss": 1.2395,
      "step": 5
    },
    {
      "epoch": 0.0008371630636679951,
      "grad_norm": 0.2943132817745209,
      "learning_rate": 6e-05,
      "loss": 1.3794,
      "step": 6
    },
    {
      "epoch": 0.0009766902409459942,
      "grad_norm": 0.3263092041015625,
      "learning_rate": 7e-05,
      "loss": 1.2371,
      "step": 7
    },
    {
      "epoch": 0.0011162174182239935,
      "grad_norm": 0.28822728991508484,
      "learning_rate": 8e-05,
      "loss": 1.1173,
      "step": 8
    },
    {
      "epoch": 0.0012557445955019926,
      "grad_norm": 0.3245331048965454,
      "learning_rate": 9e-05,
      "loss": 1.114,
      "step": 9
    },
    {
      "epoch": 0.0013952717727799917,
      "grad_norm": 0.36671578884124756,
      "learning_rate": 0.0001,
      "loss": 1.3002,
      "step": 10
    },
    {
      "epoch": 0.001534798950057991,
      "grad_norm": 0.35024723410606384,
      "learning_rate": 9.98458666866564e-05,
      "loss": 1.1694,
      "step": 11
    },
    {
      "epoch": 0.0016743261273359901,
      "grad_norm": 0.3728810250759125,
      "learning_rate": 9.938441702975689e-05,
      "loss": 1.2388,
      "step": 12
    },
    {
      "epoch": 0.0018138533046139895,
      "grad_norm": 0.42505979537963867,
      "learning_rate": 9.861849601988383e-05,
      "loss": 1.1116,
      "step": 13
    },
    {
      "epoch": 0.0018138533046139895,
      "eval_loss": 1.2273173332214355,
      "eval_runtime": 293.5985,
      "eval_samples_per_second": 41.114,
      "eval_steps_per_second": 20.559,
      "step": 13
    },
    {
      "epoch": 0.0019533804818919883,
      "grad_norm": 0.37883734703063965,
      "learning_rate": 9.755282581475769e-05,
      "loss": 1.2951,
      "step": 14
    },
    {
      "epoch": 0.002092907659169988,
      "grad_norm": 0.32293638586997986,
      "learning_rate": 9.619397662556435e-05,
      "loss": 1.0587,
      "step": 15
    },
    {
      "epoch": 0.002232434836447987,
      "grad_norm": 0.3402497172355652,
      "learning_rate": 9.45503262094184e-05,
      "loss": 1.1641,
      "step": 16
    },
    {
      "epoch": 0.002371962013725986,
      "grad_norm": 0.3069184720516205,
      "learning_rate": 9.263200821770461e-05,
      "loss": 1.1247,
      "step": 17
    },
    {
      "epoch": 0.002511489191003985,
      "grad_norm": 0.2706042230129242,
      "learning_rate": 9.045084971874738e-05,
      "loss": 1.0705,
      "step": 18
    },
    {
      "epoch": 0.0026510163682819843,
      "grad_norm": 0.29615721106529236,
      "learning_rate": 8.802029828000156e-05,
      "loss": 1.1871,
      "step": 19
    },
    {
      "epoch": 0.0027905435455599834,
      "grad_norm": 0.23046638071537018,
      "learning_rate": 8.535533905932738e-05,
      "loss": 1.2344,
      "step": 20
    },
    {
      "epoch": 0.002930070722837983,
      "grad_norm": 0.2650396227836609,
      "learning_rate": 8.247240241650918e-05,
      "loss": 1.141,
      "step": 21
    },
    {
      "epoch": 0.003069597900115982,
      "grad_norm": 0.5365617275238037,
      "learning_rate": 7.938926261462366e-05,
      "loss": 1.171,
      "step": 22
    },
    {
      "epoch": 0.003209125077393981,
      "grad_norm": 0.24520841240882874,
      "learning_rate": 7.612492823579745e-05,
      "loss": 1.0765,
      "step": 23
    },
    {
      "epoch": 0.0033486522546719803,
      "grad_norm": 0.25750038027763367,
      "learning_rate": 7.269952498697734e-05,
      "loss": 1.0613,
      "step": 24
    },
    {
      "epoch": 0.0034881794319499794,
      "grad_norm": 0.2683405578136444,
      "learning_rate": 6.91341716182545e-05,
      "loss": 1.1566,
      "step": 25
    },
    {
      "epoch": 0.003627706609227979,
      "grad_norm": 0.2555634081363678,
      "learning_rate": 6.545084971874738e-05,
      "loss": 1.0815,
      "step": 26
    },
    {
      "epoch": 0.003627706609227979,
      "eval_loss": 1.1382033824920654,
      "eval_runtime": 293.8371,
      "eval_samples_per_second": 41.081,
      "eval_steps_per_second": 20.542,
      "step": 26
    },
    {
      "epoch": 0.003767233786505978,
      "grad_norm": 0.22712597250938416,
      "learning_rate": 6.167226819279528e-05,
      "loss": 1.0138,
      "step": 27
    },
    {
      "epoch": 0.003906760963783977,
      "grad_norm": 0.2388458102941513,
      "learning_rate": 5.782172325201155e-05,
      "loss": 1.1088,
      "step": 28
    },
    {
      "epoch": 0.004046288141061977,
      "grad_norm": 0.2228991836309433,
      "learning_rate": 5.392295478639225e-05,
      "loss": 1.0923,
      "step": 29
    },
    {
      "epoch": 0.004185815318339976,
      "grad_norm": 0.23074015974998474,
      "learning_rate": 5e-05,
      "loss": 1.1228,
      "step": 30
    },
    {
      "epoch": 0.004325342495617975,
      "grad_norm": 0.23124848306179047,
      "learning_rate": 4.607704521360776e-05,
      "loss": 1.1397,
      "step": 31
    },
    {
      "epoch": 0.004464869672895974,
      "grad_norm": 0.23561260104179382,
      "learning_rate": 4.2178276747988446e-05,
      "loss": 1.1264,
      "step": 32
    },
    {
      "epoch": 0.004604396850173973,
      "grad_norm": 0.20774266123771667,
      "learning_rate": 3.832773180720475e-05,
      "loss": 1.0459,
      "step": 33
    },
    {
      "epoch": 0.004743924027451972,
      "grad_norm": 0.22472628951072693,
      "learning_rate": 3.4549150281252636e-05,
      "loss": 1.0317,
      "step": 34
    },
    {
      "epoch": 0.004883451204729971,
      "grad_norm": 0.2344016581773758,
      "learning_rate": 3.086582838174551e-05,
      "loss": 1.0939,
      "step": 35
    },
    {
      "epoch": 0.00502297838200797,
      "grad_norm": 0.23103342950344086,
      "learning_rate": 2.7300475013022663e-05,
      "loss": 0.9759,
      "step": 36
    },
    {
      "epoch": 0.0051625055592859695,
      "grad_norm": 0.3125031590461731,
      "learning_rate": 2.3875071764202563e-05,
      "loss": 1.1596,
      "step": 37
    },
    {
      "epoch": 0.005302032736563969,
      "grad_norm": 0.23054346442222595,
      "learning_rate": 2.061073738537635e-05,
      "loss": 0.9864,
      "step": 38
    },
    {
      "epoch": 0.005441559913841968,
      "grad_norm": 0.2276124805212021,
      "learning_rate": 1.7527597583490822e-05,
      "loss": 1.0919,
      "step": 39
    },
    {
      "epoch": 0.005441559913841968,
      "eval_loss": 1.1175899505615234,
      "eval_runtime": 294.0449,
      "eval_samples_per_second": 41.052,
      "eval_steps_per_second": 20.527,
      "step": 39
    },
    {
      "epoch": 0.005581087091119967,
      "grad_norm": 0.2677469253540039,
      "learning_rate": 1.4644660940672627e-05,
      "loss": 1.1697,
      "step": 40
    },
    {
      "epoch": 0.005720614268397967,
      "grad_norm": 0.21872298419475555,
      "learning_rate": 1.1979701719998453e-05,
      "loss": 1.0556,
      "step": 41
    },
    {
      "epoch": 0.005860141445675966,
      "grad_norm": 0.25124436616897583,
      "learning_rate": 9.549150281252633e-06,
      "loss": 0.9909,
      "step": 42
    },
    {
      "epoch": 0.005999668622953965,
      "grad_norm": 0.23902897536754608,
      "learning_rate": 7.367991782295391e-06,
      "loss": 1.168,
      "step": 43
    },
    {
      "epoch": 0.006139195800231964,
      "grad_norm": 0.21240876615047455,
      "learning_rate": 5.449673790581611e-06,
      "loss": 1.0213,
      "step": 44
    },
    {
      "epoch": 0.006278722977509963,
      "grad_norm": 0.24014732241630554,
      "learning_rate": 3.8060233744356633e-06,
      "loss": 1.0644,
      "step": 45
    },
    {
      "epoch": 0.006418250154787962,
      "grad_norm": 0.21955840289592743,
      "learning_rate": 2.4471741852423237e-06,
      "loss": 0.952,
      "step": 46
    },
    {
      "epoch": 0.006557777332065961,
      "grad_norm": 0.21186882257461548,
      "learning_rate": 1.3815039801161721e-06,
      "loss": 0.9789,
      "step": 47
    },
    {
      "epoch": 0.0066973045093439605,
      "grad_norm": 0.26049795746803284,
      "learning_rate": 6.15582970243117e-07,
      "loss": 0.993,
      "step": 48
    },
    {
      "epoch": 0.00683683168662196,
      "grad_norm": 0.22005507349967957,
      "learning_rate": 1.5413331334360182e-07,
      "loss": 0.9553,
      "step": 49
    },
    {
      "epoch": 0.006976358863899959,
      "grad_norm": 0.21095995604991913,
      "learning_rate": 0.0,
      "loss": 1.0059,
      "step": 50
    }
  ],
  "logging_steps": 1,
  "max_steps": 50,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 13,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.69912849629184e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}