Upload folder using huggingface_hub
Browse files- checkpoint-10/adapter_config.json +2 -2
- checkpoint-10/adapter_model.safetensors +1 -1
- checkpoint-10/optimizer.pt +1 -1
- checkpoint-10/rng_state_0.pth +2 -2
- checkpoint-10/rng_state_1.pth +2 -2
- checkpoint-10/scheduler.pt +1 -1
- checkpoint-10/trainer_state.json +8 -8
- checkpoint-10/training_args.bin +1 -1
- checkpoint-20/adapter_config.json +2 -2
- checkpoint-20/adapter_model.safetensors +1 -1
- checkpoint-20/optimizer.pt +1 -1
- checkpoint-20/rng_state_0.pth +2 -2
- checkpoint-20/rng_state_1.pth +2 -2
- checkpoint-20/scheduler.pt +1 -1
- checkpoint-20/trainer_state.json +17 -17
- checkpoint-20/training_args.bin +1 -1
- checkpoint-30/adapter_config.json +2 -2
- checkpoint-30/adapter_model.safetensors +1 -1
- checkpoint-30/optimizer.pt +1 -1
- checkpoint-30/rng_state_0.pth +2 -2
- checkpoint-30/rng_state_1.pth +2 -2
- checkpoint-30/scheduler.pt +1 -1
- checkpoint-30/trainer_state.json +22 -22
- checkpoint-30/training_args.bin +1 -1
checkpoint-10/adapter_config.json
CHANGED
@@ -20,9 +20,9 @@
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"q_proj",
|
|
|
23 |
"v_proj",
|
24 |
-
"o_proj"
|
25 |
-
"k_proj"
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
28 |
"use_rslora": false
|
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"q_proj",
|
23 |
+
"k_proj",
|
24 |
"v_proj",
|
25 |
+
"o_proj"
|
|
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
28 |
"use_rslora": false
|
checkpoint-10/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 436242776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a3e0cdae368d14a843151822c64d75b9e64eabba537d101b909316145d64f25
|
3 |
size 436242776
|
checkpoint-10/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 872568314
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07fdb544e9a22a371a5d8df82da422560206b79b40d84a212a74ea56b50f59fa
|
3 |
size 872568314
|
checkpoint-10/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec5bc6d58edfb094654e0fd7d8830f6528f86d2a2c0b08173da1fe23f3fac2ef
|
3 |
+
size 14512
|
checkpoint-10/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1386ecdf853bc6f64d03fe0f70a1791d948a0933fed4c2a20ca0f9ecc9b5ac63
|
3 |
+
size 14512
|
checkpoint-10/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:822c645454e57cd48b7e246c485396a32d986b666bc34f5e61eafab1e5f8c7e3
|
3 |
size 1000
|
checkpoint-10/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-10",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 10,
|
7 |
"is_hyper_param_search": false,
|
@@ -9,11 +9,11 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"eval_loss": 0.
|
14 |
-
"eval_runtime":
|
15 |
-
"eval_samples_per_second":
|
16 |
-
"eval_steps_per_second": 0.
|
17 |
"step": 10
|
18 |
}
|
19 |
],
|
@@ -22,7 +22,7 @@
|
|
22 |
"num_input_tokens_seen": 0,
|
23 |
"num_train_epochs": 1,
|
24 |
"save_steps": 10,
|
25 |
-
"total_flos":
|
26 |
"train_batch_size": 16,
|
27 |
"trial_name": null,
|
28 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6367942690849304,
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-10",
|
4 |
+
"epoch": 0.04956629491945477,
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 10,
|
7 |
"is_hyper_param_search": false,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.05,
|
13 |
+
"eval_loss": 0.6367942690849304,
|
14 |
+
"eval_runtime": 180.8957,
|
15 |
+
"eval_samples_per_second": 15.849,
|
16 |
+
"eval_steps_per_second": 0.995,
|
17 |
"step": 10
|
18 |
}
|
19 |
],
|
|
|
22 |
"num_input_tokens_seen": 0,
|
23 |
"num_train_epochs": 1,
|
24 |
"save_steps": 10,
|
25 |
+
"total_flos": 4.493525352080998e+16,
|
26 |
"train_batch_size": 16,
|
27 |
"trial_name": null,
|
28 |
"trial_params": null
|
checkpoint-10/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:397ed785970a10324cddec1fc7a5b8a987e32e5f4a937dc38def4b1cb481ef1b
|
3 |
size 5176
|
checkpoint-20/adapter_config.json
CHANGED
@@ -20,9 +20,9 @@
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"q_proj",
|
|
|
23 |
"v_proj",
|
24 |
-
"o_proj"
|
25 |
-
"k_proj"
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
28 |
"use_rslora": false
|
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"q_proj",
|
23 |
+
"k_proj",
|
24 |
"v_proj",
|
25 |
+
"o_proj"
|
|
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
28 |
"use_rslora": false
|
checkpoint-20/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 436242776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67f93cdadbe7004e290ff17d7efaf31b1ad0c58e951c63623a7ec2db46baa3a0
|
3 |
size 436242776
|
checkpoint-20/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 872568314
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1f3350cd435ac056c183cc29db9441a9f89e9e4bcdebaa06bdd15e0287bb1f1
|
3 |
size 872568314
|
checkpoint-20/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d08c0c3fb3de62a4c39c2dd09221b2c5eb71b33e9ddfa0dcea7e1d8b3175714f
|
3 |
+
size 14512
|
checkpoint-20/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3436f6b74a42fc7a55137f98c160bcde850031a076725f6cd3792a86a06ebdbc
|
3 |
+
size 14512
|
checkpoint-20/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c60be257347464a2f4f18286e268e0f4810c0e6453e5c9541c35f430dab6c52
|
3 |
size 1000
|
checkpoint-20/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-20",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 20,
|
7 |
"is_hyper_param_search": false,
|
@@ -9,26 +9,26 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"eval_loss": 0.
|
14 |
-
"eval_runtime":
|
15 |
-
"eval_samples_per_second":
|
16 |
-
"eval_steps_per_second": 0.
|
17 |
"step": 10
|
18 |
},
|
19 |
{
|
20 |
-
"epoch": 0.
|
21 |
-
"grad_norm": 0.
|
22 |
-
"learning_rate": 0.
|
23 |
-
"loss":
|
24 |
"step": 20
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"eval_loss": 0.
|
29 |
-
"eval_runtime":
|
30 |
-
"eval_samples_per_second":
|
31 |
-
"eval_steps_per_second": 0.
|
32 |
"step": 20
|
33 |
}
|
34 |
],
|
@@ -37,7 +37,7 @@
|
|
37 |
"num_input_tokens_seen": 0,
|
38 |
"num_train_epochs": 1,
|
39 |
"save_steps": 10,
|
40 |
-
"total_flos":
|
41 |
"train_batch_size": 16,
|
42 |
"trial_name": null,
|
43 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.48089343309402466,
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-20",
|
4 |
+
"epoch": 0.09913258983890955,
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 20,
|
7 |
"is_hyper_param_search": false,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.05,
|
13 |
+
"eval_loss": 0.6367942690849304,
|
14 |
+
"eval_runtime": 180.8957,
|
15 |
+
"eval_samples_per_second": 15.849,
|
16 |
+
"eval_steps_per_second": 0.995,
|
17 |
"step": 10
|
18 |
},
|
19 |
{
|
20 |
+
"epoch": 0.1,
|
21 |
+
"grad_norm": 0.34346863627433777,
|
22 |
+
"learning_rate": 0.00019510565162951537,
|
23 |
+
"loss": 0.9774,
|
24 |
"step": 20
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.1,
|
28 |
+
"eval_loss": 0.48089343309402466,
|
29 |
+
"eval_runtime": 180.8882,
|
30 |
+
"eval_samples_per_second": 15.85,
|
31 |
+
"eval_steps_per_second": 0.995,
|
32 |
"step": 20
|
33 |
}
|
34 |
],
|
|
|
37 |
"num_input_tokens_seen": 0,
|
38 |
"num_train_epochs": 1,
|
39 |
"save_steps": 10,
|
40 |
+
"total_flos": 9.010061420947046e+16,
|
41 |
"train_batch_size": 16,
|
42 |
"trial_name": null,
|
43 |
"trial_params": null
|
checkpoint-20/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:397ed785970a10324cddec1fc7a5b8a987e32e5f4a937dc38def4b1cb481ef1b
|
3 |
size 5176
|
checkpoint-30/adapter_config.json
CHANGED
@@ -20,9 +20,9 @@
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"q_proj",
|
|
|
23 |
"v_proj",
|
24 |
-
"o_proj"
|
25 |
-
"k_proj"
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
28 |
"use_rslora": false
|
|
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
"q_proj",
|
23 |
+
"k_proj",
|
24 |
"v_proj",
|
25 |
+
"o_proj"
|
|
|
26 |
],
|
27 |
"task_type": "CAUSAL_LM",
|
28 |
"use_rslora": false
|
checkpoint-30/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 436242776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24867322dfb4671521880b7952dfecf869c9ddf50ba8c8d0ab60442d46c3b6e7
|
3 |
size 436242776
|
checkpoint-30/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 872568314
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a095acb983c64222b3980c352b38e58ba4bd6e2f0b8e559eddea78a65277790
|
3 |
size 872568314
|
checkpoint-30/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c2a616f0c7a81cc42f0138e0036f3bc925663524713430ec02128feb7cc22f7
|
3 |
+
size 14512
|
checkpoint-30/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3547aee58cf2280e2d966f891fba83ef181a792754cf251e62b59ae644dd2253
|
3 |
+
size 14512
|
checkpoint-30/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35079c4c5c54e3e2a4600c729d1fa32b43362d22b0baf443894af8a786492df1
|
3 |
size 1000
|
checkpoint-30/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-30",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 30,
|
7 |
"is_hyper_param_search": false,
|
@@ -9,34 +9,34 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"eval_loss": 0.
|
14 |
-
"eval_runtime":
|
15 |
-
"eval_samples_per_second":
|
16 |
-
"eval_steps_per_second": 0.
|
17 |
"step": 10
|
18 |
},
|
19 |
{
|
20 |
-
"epoch": 0.
|
21 |
-
"grad_norm": 0.
|
22 |
-
"learning_rate": 0.
|
23 |
-
"loss":
|
24 |
"step": 20
|
25 |
},
|
26 |
{
|
27 |
-
"epoch": 0.
|
28 |
-
"eval_loss": 0.
|
29 |
-
"eval_runtime":
|
30 |
-
"eval_samples_per_second":
|
31 |
-
"eval_steps_per_second": 0.
|
32 |
"step": 20
|
33 |
},
|
34 |
{
|
35 |
-
"epoch": 0.
|
36 |
-
"eval_loss": 0.
|
37 |
-
"eval_runtime":
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second": 0.
|
40 |
"step": 30
|
41 |
}
|
42 |
],
|
@@ -45,7 +45,7 @@
|
|
45 |
"num_input_tokens_seen": 0,
|
46 |
"num_train_epochs": 1,
|
47 |
"save_steps": 10,
|
48 |
-
"total_flos":
|
49 |
"train_batch_size": 16,
|
50 |
"trial_name": null,
|
51 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.44581571221351624,
|
3 |
"best_model_checkpoint": "./mistral/01-03-24-Weni-ZeroShot-3.3.18-Mistral-7b-Multilanguage-3.2.0_Zeroshot-2_max_steps-100_batch_16_2024-03-01_ppid_7/checkpoint-30",
|
4 |
+
"epoch": 0.14869888475836432,
|
5 |
"eval_steps": 10,
|
6 |
"global_step": 30,
|
7 |
"is_hyper_param_search": false,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.05,
|
13 |
+
"eval_loss": 0.6367942690849304,
|
14 |
+
"eval_runtime": 180.8957,
|
15 |
+
"eval_samples_per_second": 15.849,
|
16 |
+
"eval_steps_per_second": 0.995,
|
17 |
"step": 10
|
18 |
},
|
19 |
{
|
20 |
+
"epoch": 0.1,
|
21 |
+
"grad_norm": 0.34346863627433777,
|
22 |
+
"learning_rate": 0.00019510565162951537,
|
23 |
+
"loss": 0.9774,
|
24 |
"step": 20
|
25 |
},
|
26 |
{
|
27 |
+
"epoch": 0.1,
|
28 |
+
"eval_loss": 0.48089343309402466,
|
29 |
+
"eval_runtime": 180.8882,
|
30 |
+
"eval_samples_per_second": 15.85,
|
31 |
+
"eval_steps_per_second": 0.995,
|
32 |
"step": 20
|
33 |
},
|
34 |
{
|
35 |
+
"epoch": 0.15,
|
36 |
+
"eval_loss": 0.44581571221351624,
|
37 |
+
"eval_runtime": 180.8305,
|
38 |
+
"eval_samples_per_second": 15.855,
|
39 |
+
"eval_steps_per_second": 0.995,
|
40 |
"step": 30
|
41 |
}
|
42 |
],
|
|
|
45 |
"num_input_tokens_seen": 0,
|
46 |
"num_train_epochs": 1,
|
47 |
"save_steps": 10,
|
48 |
+
"total_flos": 1.3497349192024064e+17,
|
49 |
"train_batch_size": 16,
|
50 |
"trial_name": null,
|
51 |
"trial_params": null
|
checkpoint-30/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:397ed785970a10324cddec1fc7a5b8a987e32e5f4a937dc38def4b1cb481ef1b
|
3 |
size 5176
|