End of training

Browse files

Files changed (8) hide show

README.md +3 -2
all_results.json +12 -0
eval_results.json +7 -0
runs/Dec29_00-33-03_64-181-222-101/events.out.tfevents.1735437830.64-181-222-101.10253.1 +3 -0
train_results.json +8 -0
trainer_state.json +785 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: lora_all
@@ -15,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
 # lora_all
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.7828
 ## Model description

 base_model: meta-llama/Meta-Llama-3-8B-Instruct
 tags:
 - llama-factory
+- lora
 - generated_from_trainer
 model-index:
 - name: lora_all
 # lora_all
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the healthcaremagic dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.7827
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.994667614646285,
+    "eval_loss": 1.782659888267517,
+    "eval_runtime": 74.5046,
+    "eval_samples_per_second": 134.22,
+    "eval_steps_per_second": 4.201,
+    "total_flos": 4.229776342129836e+18,
+    "train_loss": 1.8626822329427895,
+    "train_runtime": 5321.5336,
+    "train_samples_per_second": 50.737,
+    "train_steps_per_second": 0.198
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.994667614646285,
+    "eval_loss": 1.782659888267517,
+    "eval_runtime": 74.5046,
+    "eval_samples_per_second": 134.22,
+    "eval_steps_per_second": 4.201
+}

runs/Dec29_00-33-03_64-181-222-101/events.out.tfevents.1735437830.64-181-222-101.10253.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f654033f3e0ec274d7e9786a21b66774f1c7da61cacb5c975fa6ebcff7a44b39
+size 359

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.994667614646285,
+    "total_flos": 4.229776342129836e+18,
+    "train_loss": 1.8626822329427895,
+    "train_runtime": 5321.5336,
+    "train_samples_per_second": 50.737,
+    "train_steps_per_second": 0.198
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,785 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.994667614646285,
+  "eval_steps": 1000,
+  "global_step": 1053,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.028439388553146108,
+      "grad_norm": 2.7864015102386475,
+      "learning_rate": 9.433962264150944e-06,
+      "loss": 3.2455,
+      "step": 10
+    },
+    {
+      "epoch": 0.056878777106292217,
+      "grad_norm": 0.8451234698295593,
+      "learning_rate": 1.8867924528301888e-05,
+      "loss": 2.8076,
+      "step": 20
+    },
+    {
+      "epoch": 0.08531816565943832,
+      "grad_norm": 0.4598900079727173,
+      "learning_rate": 2.830188679245283e-05,
+      "loss": 2.5631,
+      "step": 30
+    },
+    {
+      "epoch": 0.11375755421258443,
+      "grad_norm": 0.3573364317417145,
+      "learning_rate": 3.7735849056603776e-05,
+      "loss": 2.4723,
+      "step": 40
+    },
+    {
+      "epoch": 0.14219694276573053,
+      "grad_norm": 0.3167368173599243,
+      "learning_rate": 4.716981132075472e-05,
+      "loss": 2.3806,
+      "step": 50
+    },
+    {
+      "epoch": 0.17063633131887665,
+      "grad_norm": 0.3523406982421875,
+      "learning_rate": 4.999395511092461e-05,
+      "loss": 2.3182,
+      "step": 60
+    },
+    {
+      "epoch": 0.19907571987202274,
+      "grad_norm": 0.3779957592487335,
+      "learning_rate": 4.996435452798774e-05,
+      "loss": 2.2473,
+      "step": 70
+    },
+    {
+      "epoch": 0.22751510842516887,
+      "grad_norm": 0.4943179786205292,
+      "learning_rate": 4.991011714111481e-05,
+      "loss": 2.199,
+      "step": 80
+    },
+    {
+      "epoch": 0.25595449697831496,
+      "grad_norm": 0.42343392968177795,
+      "learning_rate": 4.9831296476058484e-05,
+      "loss": 2.187,
+      "step": 90
+    },
+    {
+      "epoch": 0.28439388553146105,
+      "grad_norm": 0.4268713593482971,
+      "learning_rate": 4.9727970319299044e-05,
+      "loss": 2.1194,
+      "step": 100
+    },
+    {
+      "epoch": 0.3128332740846072,
+      "grad_norm": 0.48644664883613586,
+      "learning_rate": 4.9600240641278496e-05,
+      "loss": 2.1086,
+      "step": 110
+    },
+    {
+      "epoch": 0.3412726626377533,
+      "grad_norm": 0.48994767665863037,
+      "learning_rate": 4.944823349576805e-05,
+      "loss": 2.0861,
+      "step": 120
+    },
+    {
+      "epoch": 0.3697120511908994,
+      "grad_norm": 0.5868293642997742,
+      "learning_rate": 4.9272098895468277e-05,
+      "loss": 2.0809,
+      "step": 130
+    },
+    {
+      "epoch": 0.3981514397440455,
+      "grad_norm": 0.5047050714492798,
+      "learning_rate": 4.907201066396469e-05,
+      "loss": 2.0588,
+      "step": 140
+    },
+    {
+      "epoch": 0.42659082829719164,
+      "grad_norm": 0.4889000654220581,
+      "learning_rate": 4.8848166264184844e-05,
+      "loss": 2.019,
+      "step": 150
+    },
+    {
+      "epoch": 0.45503021685033773,
+      "grad_norm": 0.5262014269828796,
+      "learning_rate": 4.860078660352625e-05,
+      "loss": 2.0294,
+      "step": 160
+    },
+    {
+      "epoch": 0.4834696054034838,
+      "grad_norm": 0.4898081123828888,
+      "learning_rate": 4.8330115815847465e-05,
+      "loss": 1.9942,
+      "step": 170
+    },
+    {
+      "epoch": 0.5119089939566299,
+      "grad_norm": 0.5089215040206909,
+      "learning_rate": 4.803642102053746e-05,
+      "loss": 1.9974,
+      "step": 180
+    },
+    {
+      "epoch": 0.540348382509776,
+      "grad_norm": 0.5131760239601135,
+      "learning_rate": 4.7719992058901006e-05,
+      "loss": 1.9876,
+      "step": 190
+    },
+    {
+      "epoch": 0.5687877710629221,
+      "grad_norm": 0.4940701723098755,
+      "learning_rate": 4.7381141208120296e-05,
+      "loss": 1.9438,
+      "step": 200
+    },
+    {
+      "epoch": 0.5972271596160682,
+      "grad_norm": 0.5069682002067566,
+      "learning_rate": 4.702020287307509e-05,
+      "loss": 1.9668,
+      "step": 210
+    },
+    {
+      "epoch": 0.6256665481692144,
+      "grad_norm": 0.5188295245170593,
+      "learning_rate": 4.663753325632548e-05,
+      "loss": 1.961,
+      "step": 220
+    },
+    {
+      "epoch": 0.6541059367223605,
+      "grad_norm": 0.5184710025787354,
+      "learning_rate": 4.6233510006582914e-05,
+      "loss": 1.9327,
+      "step": 230
+    },
+    {
+      "epoch": 0.6825453252755066,
+      "grad_norm": 0.5142987370491028,
+      "learning_rate": 4.580853184601659e-05,
+      "loss": 1.9415,
+      "step": 240
+    },
+    {
+      "epoch": 0.7109847138286527,
+      "grad_norm": 0.5494813919067383,
+      "learning_rate": 4.536301817676274e-05,
+      "loss": 1.918,
+      "step": 250
+    },
+    {
+      "epoch": 0.7394241023817988,
+      "grad_norm": 0.49111875891685486,
+      "learning_rate": 4.48974086670254e-05,
+      "loss": 1.9406,
+      "step": 260
+    },
+    {
+      "epoch": 0.7678634909349449,
+      "grad_norm": 0.513344407081604,
+      "learning_rate": 4.4412162817176965e-05,
+      "loss": 1.9019,
+      "step": 270
+    },
+    {
+      "epoch": 0.796302879488091,
+      "grad_norm": 0.5305178761482239,
+      "learning_rate": 4.39077595062868e-05,
+      "loss": 1.8887,
+      "step": 280
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 0.5225845575332642,
+      "learning_rate": 4.33846965195254e-05,
+      "loss": 1.8942,
+      "step": 290
+    },
+    {
+      "epoch": 0.8531816565943833,
+      "grad_norm": 0.5039849281311035,
+      "learning_rate": 4.2843490056910534e-05,
+      "loss": 1.8961,
+      "step": 300
+    },
+    {
+      "epoch": 0.8816210451475294,
+      "grad_norm": 0.5015091896057129,
+      "learning_rate": 4.228467422388016e-05,
+      "loss": 1.9114,
+      "step": 310
+    },
+    {
+      "epoch": 0.9100604337006755,
+      "grad_norm": 0.4691685140132904,
+      "learning_rate": 4.1708800504194827e-05,
+      "loss": 1.9162,
+      "step": 320
+    },
+    {
+      "epoch": 0.9384998222538216,
+      "grad_norm": 0.48344337940216064,
+      "learning_rate": 4.1116437215689784e-05,
+      "loss": 1.8799,
+      "step": 330
+    },
+    {
+      "epoch": 0.9669392108069677,
+      "grad_norm": 0.5020110011100769,
+      "learning_rate": 4.0508168949413906e-05,
+      "loss": 1.8565,
+      "step": 340
+    },
+    {
+      "epoch": 0.9953785993601137,
+      "grad_norm": 0.514559268951416,
+      "learning_rate": 3.988459599270888e-05,
+      "loss": 1.9027,
+      "step": 350
+    },
+    {
+      "epoch": 1.0238179879132598,
+      "grad_norm": 0.5024710893630981,
+      "learning_rate": 3.9246333736798095e-05,
+      "loss": 1.9138,
+      "step": 360
+    },
+    {
+      "epoch": 1.052257376466406,
+      "grad_norm": 0.515842854976654,
+      "learning_rate": 3.859401206946982e-05,
+      "loss": 1.813,
+      "step": 370
+    },
+    {
+      "epoch": 1.080696765019552,
+      "grad_norm": 0.5406286716461182,
+      "learning_rate": 3.792827475345393e-05,
+      "loss": 1.8395,
+      "step": 380
+    },
+    {
+      "epoch": 1.1091361535726982,
+      "grad_norm": 0.5171347260475159,
+      "learning_rate": 3.724977879110591e-05,
+      "loss": 1.8314,
+      "step": 390
+    },
+    {
+      "epoch": 1.1375755421258442,
+      "grad_norm": 0.5132310390472412,
+      "learning_rate": 3.6559193776024794e-05,
+      "loss": 1.8241,
+      "step": 400
+    },
+    {
+      "epoch": 1.1660149306789904,
+      "grad_norm": 0.5178537368774414,
+      "learning_rate": 3.585720123224512e-05,
+      "loss": 1.8178,
+      "step": 410
+    },
+    {
+      "epoch": 1.1944543192321366,
+      "grad_norm": 0.5596044063568115,
+      "learning_rate": 3.5144493941655e-05,
+      "loss": 1.8174,
+      "step": 420
+    },
+    {
+      "epoch": 1.2228937077852826,
+      "grad_norm": 0.5178967714309692,
+      "learning_rate": 3.442177526030407e-05,
+      "loss": 1.7867,
+      "step": 430
+    },
+    {
+      "epoch": 1.2513330963384286,
+      "grad_norm": 0.5135601758956909,
+      "learning_rate": 3.3689758424275926e-05,
+      "loss": 1.791,
+      "step": 440
+    },
+    {
+      "epoch": 1.2797724848915748,
+      "grad_norm": 0.5257358551025391,
+      "learning_rate": 3.294916584581027e-05,
+      "loss": 1.8153,
+      "step": 450
+    },
+    {
+      "epoch": 1.308211873444721,
+      "grad_norm": 0.5272982716560364,
+      "learning_rate": 3.220072840036923e-05,
+      "loss": 1.7952,
+      "step": 460
+    },
+    {
+      "epoch": 1.336651261997867,
+      "grad_norm": 0.5305171012878418,
+      "learning_rate": 3.14451847053515e-05,
+      "loss": 1.7962,
+      "step": 470
+    },
+    {
+      "epoch": 1.3650906505510132,
+      "grad_norm": 0.5360648036003113,
+      "learning_rate": 3.068328039116616e-05,
+      "loss": 1.8002,
+      "step": 480
+    },
+    {
+      "epoch": 1.3935300391041592,
+      "grad_norm": 0.5958048105239868,
+      "learning_rate": 2.99157673653855e-05,
+      "loss": 1.8017,
+      "step": 490
+    },
+    {
+      "epoch": 1.4219694276573054,
+      "grad_norm": 0.5489828586578369,
+      "learning_rate": 2.9143403070702997e-05,
+      "loss": 1.7931,
+      "step": 500
+    },
+    {
+      "epoch": 1.4504088162104516,
+      "grad_norm": 0.5472132563591003,
+      "learning_rate": 2.8366949737428817e-05,
+      "loss": 1.8051,
+      "step": 510
+    },
+    {
+      "epoch": 1.4788482047635976,
+      "grad_norm": 0.5372362732887268,
+      "learning_rate": 2.7587173631260566e-05,
+      "loss": 1.7962,
+      "step": 520
+    },
+    {
+      "epoch": 1.5072875933167436,
+      "grad_norm": 0.5616655349731445,
+      "learning_rate": 2.6804844297071526e-05,
+      "loss": 1.7763,
+      "step": 530
+    },
+    {
+      "epoch": 1.5357269818698898,
+      "grad_norm": 0.5398069620132446,
+      "learning_rate": 2.6020733799462754e-05,
+      "loss": 1.7808,
+      "step": 540
+    },
+    {
+      "epoch": 1.564166370423036,
+      "grad_norm": 0.5471286773681641,
+      "learning_rate": 2.5235615960828605e-05,
+      "loss": 1.7836,
+      "step": 550
+    },
+    {
+      "epoch": 1.5926057589761822,
+      "grad_norm": 0.5741537809371948,
+      "learning_rate": 2.4450265597687376e-05,
+      "loss": 1.8075,
+      "step": 560
+    },
+    {
+      "epoch": 1.6210451475293282,
+      "grad_norm": 0.5587407946586609,
+      "learning_rate": 2.3665457756030988e-05,
+      "loss": 1.7669,
+      "step": 570
+    },
+    {
+      "epoch": 1.6494845360824741,
+      "grad_norm": 0.5349502563476562,
+      "learning_rate": 2.2881966946448167e-05,
+      "loss": 1.7712,
+      "step": 580
+    },
+    {
+      "epoch": 1.6779239246356203,
+      "grad_norm": 0.5548702478408813,
+      "learning_rate": 2.2100566379775967e-05,
+      "loss": 1.7669,
+      "step": 590
+    },
+    {
+      "epoch": 1.7063633131887666,
+      "grad_norm": 0.5402134656906128,
+      "learning_rate": 2.1322027204034066e-05,
+      "loss": 1.7754,
+      "step": 600
+    },
+    {
+      "epoch": 1.7348027017419125,
+      "grad_norm": 0.5542489290237427,
+      "learning_rate": 2.0547117743394744e-05,
+      "loss": 1.7959,
+      "step": 610
+    },
+    {
+      "epoch": 1.7632420902950585,
+      "grad_norm": 0.6034718751907349,
+      "learning_rate": 1.9776602739939714e-05,
+      "loss": 1.7631,
+      "step": 620
+    },
+    {
+      "epoch": 1.7916814788482047,
+      "grad_norm": 0.5417160987854004,
+      "learning_rate": 1.9011242598951962e-05,
+      "loss": 1.7681,
+      "step": 630
+    },
+    {
+      "epoch": 1.820120867401351,
+      "grad_norm": 0.5751153826713562,
+      "learning_rate": 1.8251792638487596e-05,
+      "loss": 1.7717,
+      "step": 640
+    },
+    {
+      "epoch": 1.8485602559544971,
+      "grad_norm": 0.517077624797821,
+      "learning_rate": 1.7499002343968098e-05,
+      "loss": 1.7571,
+      "step": 650
+    },
+    {
+      "epoch": 1.8769996445076431,
+      "grad_norm": 0.5594078302383423,
+      "learning_rate": 1.675361462852868e-05,
+      "loss": 1.7528,
+      "step": 660
+    },
+    {
+      "epoch": 1.905439033060789,
+      "grad_norm": 0.5302609801292419,
+      "learning_rate": 1.6016365099852735e-05,
+      "loss": 1.7454,
+      "step": 670
+    },
+    {
+      "epoch": 1.9338784216139353,
+      "grad_norm": 0.5613446235656738,
+      "learning_rate": 1.528798133421585e-05,
+      "loss": 1.7555,
+      "step": 680
+    },
+    {
+      "epoch": 1.9623178101670815,
+      "grad_norm": 0.5346989631652832,
+      "learning_rate": 1.4569182158455875e-05,
+      "loss": 1.7334,
+      "step": 690
+    },
+    {
+      "epoch": 1.9907571987202275,
+      "grad_norm": 0.5548744201660156,
+      "learning_rate": 1.3860676940577594e-05,
+      "loss": 1.7764,
+      "step": 700
+    },
+    {
+      "epoch": 2.0191965872733735,
+      "grad_norm": 0.6024349331855774,
+      "learning_rate": 1.3163164889692197e-05,
+      "loss": 1.7899,
+      "step": 710
+    },
+    {
+      "epoch": 2.0476359758265197,
+      "grad_norm": 0.583003044128418,
+      "learning_rate": 1.2477334365982248e-05,
+      "loss": 1.7026,
+      "step": 720
+    },
+    {
+      "epoch": 2.076075364379666,
+      "grad_norm": 0.572210967540741,
+      "learning_rate": 1.1803862201373342e-05,
+      "loss": 1.6817,
+      "step": 730
+    },
+    {
+      "epoch": 2.104514752932812,
+      "grad_norm": 0.567309558391571,
+      "learning_rate": 1.1143413031582645e-05,
+      "loss": 1.702,
+      "step": 740
+    },
+    {
+      "epoch": 2.132954141485958,
+      "grad_norm": 0.5702211856842041,
+      "learning_rate": 1.0496638640203774e-05,
+      "loss": 1.7001,
+      "step": 750
+    },
+    {
+      "epoch": 2.161393530039104,
+      "grad_norm": 0.598107635974884,
+      "learning_rate": 9.864177315474968e-06,
+      "loss": 1.6904,
+      "step": 760
+    },
+    {
+      "epoch": 2.1898329185922503,
+      "grad_norm": 0.5752361416816711,
+      "learning_rate": 9.246653220365778e-06,
+      "loss": 1.7187,
+      "step": 770
+    },
+    {
+      "epoch": 2.2182723071453965,
+      "grad_norm": 0.5880258679389954,
+      "learning_rate": 8.644675776603476e-06,
+      "loss": 1.6973,
+      "step": 780
+    },
+    {
+      "epoch": 2.2467116956985427,
+      "grad_norm": 0.5714329481124878,
+      "learning_rate": 8.058839063247447e-06,
+      "loss": 1.706,
+      "step": 790
+    },
+    {
+      "epoch": 2.2751510842516884,
+      "grad_norm": 0.6069587469100952,
+      "learning_rate": 7.489721230404842e-06,
+      "loss": 1.7323,
+      "step": 800
+    },
+    {
+      "epoch": 2.3035904728048346,
+      "grad_norm": 0.5789757966995239,
+      "learning_rate": 6.937883928666255e-06,
+      "loss": 1.7076,
+      "step": 810
+    },
+    {
+      "epoch": 2.332029861357981,
+      "grad_norm": 0.6063619256019592,
+      "learning_rate": 6.403871754824373e-06,
+      "loss": 1.6819,
+      "step": 820
+    },
+    {
+      "epoch": 2.360469249911127,
+      "grad_norm": 0.5955121517181396,
+      "learning_rate": 5.8882117144227115e-06,
+      "loss": 1.6991,
+      "step": 830
+    },
+    {
+      "epoch": 2.3889086384642733,
+      "grad_norm": 0.5998035073280334,
+      "learning_rate": 5.391412701664744e-06,
+      "loss": 1.6747,
+      "step": 840
+    },
+    {
+      "epoch": 2.417348027017419,
+      "grad_norm": 0.6063375473022461,
+      "learning_rate": 4.91396499719681e-06,
+      "loss": 1.7041,
+      "step": 850
+    },
+    {
+      "epoch": 2.4457874155705652,
+      "grad_norm": 0.6151806712150574,
+      "learning_rate": 4.456339784260247e-06,
+      "loss": 1.6868,
+      "step": 860
+    },
+    {
+      "epoch": 2.4742268041237114,
+      "grad_norm": 0.5840434432029724,
+      "learning_rate": 4.018988683690461e-06,
+      "loss": 1.6757,
+      "step": 870
+    },
+    {
+      "epoch": 2.502666192676857,
+      "grad_norm": 0.5928480625152588,
+      "learning_rate": 3.6023433082216755e-06,
+      "loss": 1.702,
+      "step": 880
+    },
+    {
+      "epoch": 2.5311055812300034,
+      "grad_norm": 0.5977081656455994,
+      "learning_rate": 3.2068148365372806e-06,
+      "loss": 1.7001,
+      "step": 890
+    },
+    {
+      "epoch": 2.5595449697831496,
+      "grad_norm": 0.6291081309318542,
+      "learning_rate": 2.832793607486087e-06,
+      "loss": 1.7146,
+      "step": 900
+    },
+    {
+      "epoch": 2.587984358336296,
+      "grad_norm": 0.6002483367919922,
+      "learning_rate": 2.4806487348650485e-06,
+      "loss": 1.6753,
+      "step": 910
+    },
+    {
+      "epoch": 2.616423746889442,
+      "grad_norm": 0.5828536748886108,
+      "learning_rate": 2.150727743148473e-06,
+      "loss": 1.6863,
+      "step": 920
+    },
+    {
+      "epoch": 2.6448631354425878,
+      "grad_norm": 0.6066195964813232,
+      "learning_rate": 1.8433562245233349e-06,
+      "loss": 1.658,
+      "step": 930
+    },
+    {
+      "epoch": 2.673302523995734,
+      "grad_norm": 0.6164836287498474,
+      "learning_rate": 1.5588375175691117e-06,
+      "loss": 1.6957,
+      "step": 940
+    },
+    {
+      "epoch": 2.70174191254888,
+      "grad_norm": 0.6047420501708984,
+      "learning_rate": 1.2974524078991995e-06,
+      "loss": 1.677,
+      "step": 950
+    },
+    {
+      "epoch": 2.7301813011020264,
+      "grad_norm": 0.5752055048942566,
+      "learning_rate": 1.0594588510594445e-06,
+      "loss": 1.6802,
+      "step": 960
+    },
+    {
+      "epoch": 2.7586206896551726,
+      "grad_norm": 0.5959452986717224,
+      "learning_rate": 8.450917179571305e-07,
+      "loss": 1.6897,
+      "step": 970
+    },
+    {
+      "epoch": 2.7870600782083184,
+      "grad_norm": 0.5909944176673889,
+      "learning_rate": 6.545625630717783e-07,
+      "loss": 1.6916,
+      "step": 980
+    },
+    {
+      "epoch": 2.8154994667614646,
+      "grad_norm": 0.601028323173523,
+      "learning_rate": 4.880594156763896e-07,
+      "loss": 1.7078,
+      "step": 990
+    },
+    {
+      "epoch": 2.8439388553146108,
+      "grad_norm": 0.5776922702789307,
+      "learning_rate": 3.4574659427528133e-07,
+      "loss": 1.6961,
+      "step": 1000
+    },
+    {
+      "epoch": 2.8439388553146108,
+      "eval_loss": 1.7827636003494263,
+      "eval_runtime": 74.8387,
+      "eval_samples_per_second": 133.621,
+      "eval_steps_per_second": 4.182,
+      "step": 1000
+    },
+    {
+      "epoch": 2.872378243867757,
+      "grad_norm": 0.6100145578384399,
+      "learning_rate": 2.2776454444153328e-07,
+      "loss": 1.6978,
+      "step": 1010
+    },
+    {
+      "epoch": 2.900817632420903,
+      "grad_norm": 0.6016635894775391,
+      "learning_rate": 1.342297002141918e-07,
+      "loss": 1.6623,
+      "step": 1020
+    },
+    {
+      "epoch": 2.929257020974049,
+      "grad_norm": 0.5942381620407104,
+      "learning_rate": 6.523436919190773e-08,
+      "loss": 1.6999,
+      "step": 1030
+    },
+    {
+      "epoch": 2.957696409527195,
+      "grad_norm": 0.6014872789382935,
+      "learning_rate": 2.0846641436497726e-08,
+      "loss": 1.6816,
+      "step": 1040
+    },
+    {
+      "epoch": 2.9861357980803414,
+      "grad_norm": 0.6061838269233704,
+      "learning_rate": 1.1103222762542941e-09,
+      "loss": 1.7004,
+      "step": 1050
+    },
+    {
+      "epoch": 2.994667614646285,
+      "step": 1053,
+      "total_flos": 4.229776342129836e+18,
+      "train_loss": 1.8626822329427895,
+      "train_runtime": 5321.5336,
+      "train_samples_per_second": 50.737,
+      "train_steps_per_second": 0.198
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1053,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.229776342129836e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed