File size: 1,751 Bytes
eacae03
 
 
8937053
 
eacae03
 
 
 
083e767
 
8937053
 
083e767
 
 
 
8937053
 
083e767
 
 
 
8937053
 
083e767
 
 
 
8937053
 
083e767
 
eacae03
 
8937053
 
eacae03
 
083e767
 
8937053
 
083e767
 
 
 
8937053
 
083e767
 
 
 
8937053
 
083e767
 
 
 
8937053
 
083e767
 
eacae03
 
8937053
 
eacae03
 
083e767
8937053
 
 
 
 
 
 
eacae03
 
8937053
 
 
eacae03
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 18.51851851851852,
  "global_step": 1000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.85,
      "learning_rate": 5.401e-05,
      "loss": 2.5377,
      "step": 100
    },
    {
      "epoch": 3.7,
      "learning_rate": 4.8020000000000004e-05,
      "loss": 1.7685,
      "step": 200
    },
    {
      "epoch": 5.56,
      "learning_rate": 4.203e-05,
      "loss": 1.4614,
      "step": 300
    },
    {
      "epoch": 7.41,
      "learning_rate": 3.604e-05,
      "loss": 1.282,
      "step": 400
    },
    {
      "epoch": 9.26,
      "learning_rate": 3.0050000000000002e-05,
      "loss": 1.157,
      "step": 500
    },
    {
      "epoch": 11.11,
      "learning_rate": 2.406e-05,
      "loss": 1.0986,
      "step": 600
    },
    {
      "epoch": 12.96,
      "learning_rate": 1.807e-05,
      "loss": 1.0388,
      "step": 700
    },
    {
      "epoch": 14.81,
      "learning_rate": 1.2079999999999998e-05,
      "loss": 0.9946,
      "step": 800
    },
    {
      "epoch": 16.67,
      "learning_rate": 6.0899999999999984e-06,
      "loss": 0.9791,
      "step": 900
    },
    {
      "epoch": 18.52,
      "learning_rate": 1e-07,
      "loss": 0.9462,
      "step": 1000
    },
    {
      "epoch": 18.52,
      "step": 1000,
      "total_flos": 2.7666045298173542e+17,
      "train_loss": 1.3263815078735353,
      "train_runtime": 482.7688,
      "train_samples_per_second": 33.142,
      "train_steps_per_second": 2.071
    }
  ],
  "max_steps": 1000,
  "num_train_epochs": 19,
  "total_flos": 2.7666045298173542e+17,
  "trial_name": null,
  "trial_params": null
}