tyzhu commited on
Commit
8881ee1
·
verified ·
1 Parent(s): 41cdd0e

End of training

Browse files
Files changed (6) hide show
  1. README.md +14 -2
  2. all_results.json +12 -12
  3. eval_results.json +7 -7
  4. tokenizer.json +1 -6
  5. train_results.json +6 -6
  6. trainer_state.json +340 -12
README.md CHANGED
@@ -3,11 +3,23 @@ license: other
3
  base_model: Qwen/Qwen1.5-4B
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: lmind_nq_train6000_eval6489_v1_docidx_v3_Qwen_Qwen1.5-4B_lora2
10
- results: []
 
 
 
 
 
 
 
 
 
 
11
  library_name: peft
12
  ---
13
 
@@ -16,7 +28,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # lmind_nq_train6000_eval6489_v1_docidx_v3_Qwen_Qwen1.5-4B_lora2
18
 
19
- This model is a fine-tuned version of [Qwen/Qwen1.5-4B](https://huggingface.co/Qwen/Qwen1.5-4B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 5.3392
22
  - Accuracy: 0.4286
 
3
  base_model: Qwen/Qwen1.5-4B
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3
8
  metrics:
9
  - accuracy
10
  model-index:
11
  - name: lmind_nq_train6000_eval6489_v1_docidx_v3_Qwen_Qwen1.5-4B_lora2
12
+ results:
13
+ - task:
14
+ name: Causal Language Modeling
15
+ type: text-generation
16
+ dataset:
17
+ name: tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3
18
+ type: tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3
19
+ metrics:
20
+ - name: Accuracy
21
+ type: accuracy
22
+ value: 0.4286153846153846
23
  library_name: peft
24
  ---
25
 
 
28
 
29
  # lmind_nq_train6000_eval6489_v1_docidx_v3_Qwen_Qwen1.5-4B_lora2
30
 
31
+ This model is a fine-tuned version of [Qwen/Qwen1.5-4B](https://huggingface.co/Qwen/Qwen1.5-4B) on the tyzhu/lmind_nq_train6000_eval6489_v1_docidx_v3 dataset.
32
  It achieves the following results on the evaluation set:
33
  - Loss: 5.3392
34
  - Accuracy: 0.4286
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 9.985358711566619,
3
- "eval_accuracy": 0.44876923076923075,
4
- "eval_loss": 4.579395294189453,
5
- "eval_runtime": 5.7694,
6
  "eval_samples": 500,
7
- "eval_samples_per_second": 86.664,
8
- "eval_steps_per_second": 10.92,
9
- "perplexity": 97.45544451167304,
10
- "total_flos": 2.928245903951135e+17,
11
- "train_loss": 1.2971285638221897,
12
- "train_runtime": 7961.3414,
13
  "train_samples": 10925,
14
- "train_samples_per_second": 13.723,
15
- "train_steps_per_second": 0.428
16
  }
 
1
  {
2
+ "epoch": 19.98535871156662,
3
+ "eval_accuracy": 0.4286153846153846,
4
+ "eval_loss": 5.339197635650635,
5
+ "eval_runtime": 7.9613,
6
  "eval_samples": 500,
7
+ "eval_samples_per_second": 62.804,
8
+ "eval_steps_per_second": 7.913,
9
+ "perplexity": 208.34547422507072,
10
+ "total_flos": 5.856533154721956e+17,
11
+ "train_loss": 0.12585465450091096,
12
+ "train_runtime": 12067.3128,
13
  "train_samples": 10925,
14
+ "train_samples_per_second": 18.107,
15
+ "train_steps_per_second": 0.565
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 9.985358711566619,
3
- "eval_accuracy": 0.44876923076923075,
4
- "eval_loss": 4.579395294189453,
5
- "eval_runtime": 5.7694,
6
  "eval_samples": 500,
7
- "eval_samples_per_second": 86.664,
8
- "eval_steps_per_second": 10.92,
9
- "perplexity": 97.45544451167304
10
  }
 
1
  {
2
+ "epoch": 19.98535871156662,
3
+ "eval_accuracy": 0.4286153846153846,
4
+ "eval_loss": 5.339197635650635,
5
+ "eval_runtime": 7.9613,
6
  "eval_samples": 500,
7
+ "eval_samples_per_second": 62.804,
8
+ "eval_steps_per_second": 7.913,
9
+ "perplexity": 208.34547422507072
10
  }
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1024,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.985358711566619,
3
- "total_flos": 2.928245903951135e+17,
4
- "train_loss": 1.2971285638221897,
5
- "train_runtime": 7961.3414,
6
  "train_samples": 10925,
7
- "train_samples_per_second": 13.723,
8
- "train_steps_per_second": 0.428
9
  }
 
1
  {
2
+ "epoch": 19.98535871156662,
3
+ "total_flos": 5.856533154721956e+17,
4
+ "train_loss": 0.12585465450091096,
5
+ "train_runtime": 12067.3128,
6
  "train_samples": 10925,
7
+ "train_samples_per_second": 18.107,
8
+ "train_steps_per_second": 0.565
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.985358711566619,
5
  "eval_steps": 500,
6
- "global_step": 3410,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -337,21 +337,349 @@
337
  "step": 3410
338
  },
339
  {
340
- "epoch": 9.985358711566619,
341
- "step": 3410,
342
- "total_flos": 2.928245903951135e+17,
343
- "train_loss": 1.2971285638221897,
344
- "train_runtime": 7961.3414,
345
- "train_samples_per_second": 13.723,
346
- "train_steps_per_second": 0.428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "logging_steps": 100,
350
- "max_steps": 3410,
351
  "num_input_tokens_seen": 0,
352
- "num_train_epochs": 10,
353
  "save_steps": 500,
354
- "total_flos": 2.928245903951135e+17,
355
  "train_batch_size": 1,
356
  "trial_name": null,
357
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.98535871156662,
5
  "eval_steps": 500,
6
+ "global_step": 6820,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
337
  "step": 3410
338
  },
339
  {
340
+ "epoch": 10.263543191800878,
341
+ "grad_norm": 1.5488256216049194,
342
+ "learning_rate": 0.0001,
343
+ "loss": 0.4245,
344
+ "step": 3500
345
+ },
346
+ {
347
+ "epoch": 10.556368960468522,
348
+ "grad_norm": 1.543874740600586,
349
+ "learning_rate": 0.0001,
350
+ "loss": 0.4474,
351
+ "step": 3600
352
+ },
353
+ {
354
+ "epoch": 10.849194729136164,
355
+ "grad_norm": 1.6338953971862793,
356
+ "learning_rate": 0.0001,
357
+ "loss": 0.4641,
358
+ "step": 3700
359
+ },
360
+ {
361
+ "epoch": 10.998535871156662,
362
+ "eval_accuracy": 0.4494871794871795,
363
+ "eval_loss": 4.709042549133301,
364
+ "eval_runtime": 6.1807,
365
+ "eval_samples_per_second": 80.898,
366
+ "eval_steps_per_second": 10.193,
367
+ "step": 3751
368
+ },
369
+ {
370
+ "epoch": 11.142020497803808,
371
+ "grad_norm": 1.4190610647201538,
372
+ "learning_rate": 0.0001,
373
+ "loss": 0.4121,
374
+ "step": 3800
375
+ },
376
+ {
377
+ "epoch": 11.43484626647145,
378
+ "grad_norm": 1.4251505136489868,
379
+ "learning_rate": 0.0001,
380
+ "loss": 0.3584,
381
+ "step": 3900
382
+ },
383
+ {
384
+ "epoch": 11.727672035139092,
385
+ "grad_norm": 1.4489820003509521,
386
+ "learning_rate": 0.0001,
387
+ "loss": 0.3755,
388
+ "step": 4000
389
+ },
390
+ {
391
+ "epoch": 12.0,
392
+ "eval_accuracy": 0.43543589743589745,
393
+ "eval_loss": 4.94539213180542,
394
+ "eval_runtime": 7.2389,
395
+ "eval_samples_per_second": 69.071,
396
+ "eval_steps_per_second": 8.703,
397
+ "step": 4093
398
+ },
399
+ {
400
+ "epoch": 12.020497803806736,
401
+ "grad_norm": 1.3672994375228882,
402
+ "learning_rate": 0.0001,
403
+ "loss": 0.3818,
404
+ "step": 4100
405
+ },
406
+ {
407
+ "epoch": 12.313323572474378,
408
+ "grad_norm": 1.3448461294174194,
409
+ "learning_rate": 0.0001,
410
+ "loss": 0.2861,
411
+ "step": 4200
412
+ },
413
+ {
414
+ "epoch": 12.60614934114202,
415
+ "grad_norm": 1.545249581336975,
416
+ "learning_rate": 0.0001,
417
+ "loss": 0.3069,
418
+ "step": 4300
419
+ },
420
+ {
421
+ "epoch": 12.898975109809664,
422
+ "grad_norm": 1.706429362297058,
423
+ "learning_rate": 0.0001,
424
+ "loss": 0.3235,
425
+ "step": 4400
426
+ },
427
+ {
428
+ "epoch": 12.998535871156662,
429
+ "eval_accuracy": 0.4379487179487179,
430
+ "eval_loss": 5.062421798706055,
431
+ "eval_runtime": 8.0039,
432
+ "eval_samples_per_second": 62.47,
433
+ "eval_steps_per_second": 7.871,
434
+ "step": 4434
435
+ },
436
+ {
437
+ "epoch": 13.191800878477306,
438
+ "grad_norm": 1.3922677040100098,
439
+ "learning_rate": 0.0001,
440
+ "loss": 0.2675,
441
+ "step": 4500
442
+ },
443
+ {
444
+ "epoch": 13.48462664714495,
445
+ "grad_norm": 1.5473686456680298,
446
+ "learning_rate": 0.0001,
447
+ "loss": 0.2568,
448
+ "step": 4600
449
+ },
450
+ {
451
+ "epoch": 13.777452415812592,
452
+ "grad_norm": 1.5473802089691162,
453
+ "learning_rate": 0.0001,
454
+ "loss": 0.2691,
455
+ "step": 4700
456
+ },
457
+ {
458
+ "epoch": 14.0,
459
+ "eval_accuracy": 0.43446153846153845,
460
+ "eval_loss": 5.095705032348633,
461
+ "eval_runtime": 8.6359,
462
+ "eval_samples_per_second": 57.898,
463
+ "eval_steps_per_second": 7.295,
464
+ "step": 4776
465
+ },
466
+ {
467
+ "epoch": 14.070278184480234,
468
+ "grad_norm": 1.3755005598068237,
469
+ "learning_rate": 0.0001,
470
+ "loss": 0.2602,
471
+ "step": 4800
472
+ },
473
+ {
474
+ "epoch": 14.363103953147878,
475
+ "grad_norm": 1.5911964178085327,
476
+ "learning_rate": 0.0001,
477
+ "loss": 0.2195,
478
+ "step": 4900
479
+ },
480
+ {
481
+ "epoch": 14.65592972181552,
482
+ "grad_norm": 1.5082800388336182,
483
+ "learning_rate": 0.0001,
484
+ "loss": 0.2298,
485
+ "step": 5000
486
+ },
487
+ {
488
+ "epoch": 14.948755490483162,
489
+ "grad_norm": 1.9005481004714966,
490
+ "learning_rate": 0.0001,
491
+ "loss": 0.2394,
492
+ "step": 5100
493
+ },
494
+ {
495
+ "epoch": 14.998535871156662,
496
+ "eval_accuracy": 0.43676923076923074,
497
+ "eval_loss": 5.183106899261475,
498
+ "eval_runtime": 7.9023,
499
+ "eval_samples_per_second": 63.272,
500
+ "eval_steps_per_second": 7.972,
501
+ "step": 5117
502
+ },
503
+ {
504
+ "epoch": 15.241581259150806,
505
+ "grad_norm": 1.6176873445510864,
506
+ "learning_rate": 0.0001,
507
+ "loss": 0.1998,
508
+ "step": 5200
509
+ },
510
+ {
511
+ "epoch": 15.534407027818448,
512
+ "grad_norm": 1.5397286415100098,
513
+ "learning_rate": 0.0001,
514
+ "loss": 0.203,
515
+ "step": 5300
516
+ },
517
+ {
518
+ "epoch": 15.82723279648609,
519
+ "grad_norm": 1.572167992591858,
520
+ "learning_rate": 0.0001,
521
+ "loss": 0.2112,
522
+ "step": 5400
523
+ },
524
+ {
525
+ "epoch": 16.0,
526
+ "eval_accuracy": 0.4326153846153846,
527
+ "eval_loss": 5.322297096252441,
528
+ "eval_runtime": 6.2388,
529
+ "eval_samples_per_second": 80.144,
530
+ "eval_steps_per_second": 10.098,
531
+ "step": 5459
532
+ },
533
+ {
534
+ "epoch": 16.120058565153734,
535
+ "grad_norm": 1.1185795068740845,
536
+ "learning_rate": 0.0001,
537
+ "loss": 0.1997,
538
+ "step": 5500
539
+ },
540
+ {
541
+ "epoch": 16.412884333821378,
542
+ "grad_norm": 1.3208354711532593,
543
+ "learning_rate": 0.0001,
544
+ "loss": 0.1826,
545
+ "step": 5600
546
+ },
547
+ {
548
+ "epoch": 16.705710102489018,
549
+ "grad_norm": 1.262676477432251,
550
+ "learning_rate": 0.0001,
551
+ "loss": 0.1928,
552
+ "step": 5700
553
+ },
554
+ {
555
+ "epoch": 16.998535871156662,
556
+ "grad_norm": 1.3489364385604858,
557
+ "learning_rate": 0.0001,
558
+ "loss": 0.1994,
559
+ "step": 5800
560
+ },
561
+ {
562
+ "epoch": 16.998535871156662,
563
+ "eval_accuracy": 0.4301025641025641,
564
+ "eval_loss": 5.383902072906494,
565
+ "eval_runtime": 6.4812,
566
+ "eval_samples_per_second": 77.146,
567
+ "eval_steps_per_second": 9.72,
568
+ "step": 5800
569
+ },
570
+ {
571
+ "epoch": 17.291361639824306,
572
+ "grad_norm": 1.4268001317977905,
573
+ "learning_rate": 0.0001,
574
+ "loss": 0.1674,
575
+ "step": 5900
576
+ },
577
+ {
578
+ "epoch": 17.584187408491946,
579
+ "grad_norm": 1.3584396839141846,
580
+ "learning_rate": 0.0001,
581
+ "loss": 0.1761,
582
+ "step": 6000
583
+ },
584
+ {
585
+ "epoch": 17.87701317715959,
586
+ "grad_norm": 1.5572041273117065,
587
+ "learning_rate": 0.0001,
588
+ "loss": 0.1834,
589
+ "step": 6100
590
+ },
591
+ {
592
+ "epoch": 18.0,
593
+ "eval_accuracy": 0.42856410256410254,
594
+ "eval_loss": 5.423606872558594,
595
+ "eval_runtime": 6.3593,
596
+ "eval_samples_per_second": 78.625,
597
+ "eval_steps_per_second": 9.907,
598
+ "step": 6142
599
+ },
600
+ {
601
+ "epoch": 18.169838945827234,
602
+ "grad_norm": 1.573486328125,
603
+ "learning_rate": 0.0001,
604
+ "loss": 0.1687,
605
+ "step": 6200
606
+ },
607
+ {
608
+ "epoch": 18.462664714494874,
609
+ "grad_norm": 1.2936781644821167,
610
+ "learning_rate": 0.0001,
611
+ "loss": 0.1631,
612
+ "step": 6300
613
+ },
614
+ {
615
+ "epoch": 18.755490483162518,
616
+ "grad_norm": 1.4259896278381348,
617
+ "learning_rate": 0.0001,
618
+ "loss": 0.1709,
619
+ "step": 6400
620
+ },
621
+ {
622
+ "epoch": 18.998535871156662,
623
+ "eval_accuracy": 0.42912820512820515,
624
+ "eval_loss": 5.484007835388184,
625
+ "eval_runtime": 7.2298,
626
+ "eval_samples_per_second": 69.158,
627
+ "eval_steps_per_second": 8.714,
628
+ "step": 6483
629
+ },
630
+ {
631
+ "epoch": 19.048316251830162,
632
+ "grad_norm": 0.8712400197982788,
633
+ "learning_rate": 0.0001,
634
+ "loss": 0.1722,
635
+ "step": 6500
636
+ },
637
+ {
638
+ "epoch": 19.341142020497802,
639
+ "grad_norm": 1.1428442001342773,
640
+ "learning_rate": 0.0001,
641
+ "loss": 0.1513,
642
+ "step": 6600
643
+ },
644
+ {
645
+ "epoch": 19.633967789165446,
646
+ "grad_norm": 1.1861703395843506,
647
+ "learning_rate": 0.0001,
648
+ "loss": 0.161,
649
+ "step": 6700
650
+ },
651
+ {
652
+ "epoch": 19.92679355783309,
653
+ "grad_norm": 1.1480906009674072,
654
+ "learning_rate": 0.0001,
655
+ "loss": 0.166,
656
+ "step": 6800
657
+ },
658
+ {
659
+ "epoch": 19.98535871156662,
660
+ "eval_accuracy": 0.4286153846153846,
661
+ "eval_loss": 5.339197635650635,
662
+ "eval_runtime": 7.1865,
663
+ "eval_samples_per_second": 69.575,
664
+ "eval_steps_per_second": 8.766,
665
+ "step": 6820
666
+ },
667
+ {
668
+ "epoch": 19.98535871156662,
669
+ "step": 6820,
670
+ "total_flos": 5.856533154721956e+17,
671
+ "train_loss": 0.12585465450091096,
672
+ "train_runtime": 12067.3128,
673
+ "train_samples_per_second": 18.107,
674
+ "train_steps_per_second": 0.565
675
  }
676
  ],
677
  "logging_steps": 100,
678
+ "max_steps": 6820,
679
  "num_input_tokens_seen": 0,
680
+ "num_train_epochs": 20,
681
  "save_steps": 500,
682
+ "total_flos": 5.856533154721956e+17,
683
  "train_batch_size": 1,
684
  "trial_name": null,
685
  "trial_params": null