Dmitry Chaplinsky
commited on
Commit
·
b28ee5f
1
Parent(s):
e66a7fd
More iterations
Browse files- best-lm.pt +1 -1
- loss.txt +34 -0
best-lm.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 22791455
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c89c32c97671b2dd8128ad456b72f15a85ab049028aab1cb3f0165bfece949e
|
3 |
size 22791455
|
loss.txt
CHANGED
@@ -339,3 +339,37 @@
|
|
339 |
| end of split 31 / 28 | epoch 12 | time: 3780.51s | valid loss 1.0500 | valid ppl 2.8576 | learning rate 5.0000
|
340 |
| end of split 32 / 28 | epoch 12 | time: 3779.10s | valid loss 1.0500 | valid ppl 2.8576 | learning rate 5.0000
|
341 |
| end of split 33 / 28 | epoch 12 | time: 1096.89s | valid loss 1.0500 | valid ppl 2.8576 | learning rate 5.0000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
| end of split 31 / 28 | epoch 12 | time: 3780.51s | valid loss 1.0500 | valid ppl 2.8576 | learning rate 5.0000
|
340 |
| end of split 32 / 28 | epoch 12 | time: 3779.10s | valid loss 1.0500 | valid ppl 2.8576 | learning rate 5.0000
|
341 |
| end of split 33 / 28 | epoch 12 | time: 1096.89s | valid loss 1.0500 | valid ppl 2.8576 | learning rate 5.0000
|
342 |
+
| end of split 34 / 28 | epoch 12 | time: 3777.57s | valid loss 1.0499 | valid ppl 2.8574 | learning rate 5.0000
|
343 |
+
| end of split 35 / 28 | epoch 12 | time: 3779.50s | valid loss 1.0501 | valid ppl 2.8581 | learning rate 5.0000
|
344 |
+
| end of split 36 / 28 | epoch 12 | time: 3782.16s | valid loss 1.0499 | valid ppl 2.8573 | learning rate 5.0000
|
345 |
+
| end of split 37 / 28 | epoch 12 | time: 3777.44s | valid loss 1.0498 | valid ppl 2.8572 | learning rate 5.0000
|
346 |
+
| end of split 38 / 28 | epoch 12 | time: 3777.04s | valid loss 1.0499 | valid ppl 2.8573 | learning rate 5.0000
|
347 |
+
| end of split 39 / 28 | epoch 12 | time: 3774.81s | valid loss 1.0501 | valid ppl 2.8580 | learning rate 5.0000
|
348 |
+
| end of split 40 / 28 | epoch 12 | time: 3775.55s | valid loss 1.0498 | valid ppl 2.8570 | learning rate 5.0000
|
349 |
+
| end of split 41 / 28 | epoch 12 | time: 3780.06s | valid loss 1.0498 | valid ppl 2.8569 | learning rate 5.0000
|
350 |
+
| end of split 42 / 28 | epoch 12 | time: 3781.04s | valid loss 1.0497 | valid ppl 2.8567 | learning rate 5.0000
|
351 |
+
| end of split 43 / 28 | epoch 12 | time: 3778.87s | valid loss 1.0496 | valid ppl 2.8565 | learning rate 5.0000
|
352 |
+
| end of split 44 / 28 | epoch 12 | time: 3778.19s | valid loss 1.0496 | valid ppl 2.8566 | learning rate 5.0000
|
353 |
+
| end of split 45 / 28 | epoch 12 | time: 3780.17s | valid loss 1.0496 | valid ppl 2.8565 | learning rate 5.0000
|
354 |
+
| end of split 46 / 28 | epoch 12 | time: 3778.47s | valid loss 1.0496 | valid ppl 2.8564 | learning rate 5.0000
|
355 |
+
| end of split 47 / 28 | epoch 12 | time: 3780.63s | valid loss 1.0495 | valid ppl 2.8563 | learning rate 5.0000
|
356 |
+
| end of split 48 / 28 | epoch 12 | time: 3783.64s | valid loss 1.0495 | valid ppl 2.8563 | learning rate 5.0000
|
357 |
+
| end of split 49 / 28 | epoch 12 | time: 3783.57s | valid loss 1.0496 | valid ppl 2.8566 | learning rate 5.0000
|
358 |
+
| end of split 50 / 28 | epoch 12 | time: 3781.86s | valid loss 1.0495 | valid ppl 2.8562 | learning rate 5.0000
|
359 |
+
| end of split 51 / 28 | epoch 12 | time: 3785.70s | valid loss 1.0494 | valid ppl 2.8559 | learning rate 5.0000
|
360 |
+
| end of split 52 / 28 | epoch 12 | time: 3789.97s | valid loss 1.0494 | valid ppl 2.8559 | learning rate 5.0000
|
361 |
+
| end of split 53 / 28 | epoch 12 | time: 3790.93s | valid loss 1.0494 | valid ppl 2.8559 | learning rate 5.0000
|
362 |
+
| end of split 54 / 28 | epoch 12 | time: 3809.97s | valid loss 1.0493 | valid ppl 2.8558 | learning rate 5.0000
|
363 |
+
| end of split 55 / 28 | epoch 12 | time: 3815.38s | valid loss 1.0494 | valid ppl 2.8559 | learning rate 5.0000
|
364 |
+
| end of split 56 / 28 | epoch 12 | time: 3823.98s | valid loss 1.0492 | valid ppl 2.8554 | learning rate 5.0000
|
365 |
+
| end of split 57 / 28 | epoch 12 | time: 3821.56s | valid loss 1.0493 | valid ppl 2.8556 | learning rate 5.0000
|
366 |
+
| end of split 30 / 28 | epoch 13 | time: 3781.69s | valid loss 1.0493 | valid ppl 2.8558 | learning rate 5.0000
|
367 |
+
| end of split 31 / 28 | epoch 13 | time: 3822.49s | valid loss 1.0492 | valid ppl 2.8552 | learning rate 5.0000
|
368 |
+
| end of split 32 / 28 | epoch 13 | time: 3826.45s | valid loss 1.0491 | valid ppl 2.8552 | learning rate 5.0000
|
369 |
+
| end of split 33 / 28 | epoch 13 | time: 3825.81s | valid loss 1.0491 | valid ppl 2.8550 | learning rate 5.0000
|
370 |
+
| end of split 34 / 28 | epoch 13 | time: 3825.85s | valid loss 1.0490 | valid ppl 2.8549 | learning rate 5.0000
|
371 |
+
| end of split 35 / 28 | epoch 13 | time: 3805.21s | valid loss 1.0491 | valid ppl 2.8551 | learning rate 5.0000
|
372 |
+
| end of split 36 / 28 | epoch 13 | time: 3833.10s | valid loss 1.0490 | valid ppl 2.8547 | learning rate 5.0000
|
373 |
+
| end of split 37 / 28 | epoch 13 | time: 3790.99s | valid loss 1.0489 | valid ppl 2.8545 | learning rate 5.0000
|
374 |
+
| end of split 38 / 28 | epoch 13 | time: 3794.98s | valid loss 1.0490 | valid ppl 2.8547 | learning rate 5.0000
|
375 |
+
| end of split 39 / 28 | epoch 13 | time: 3794.00s | valid loss 1.0490 | valid ppl 2.8547 | learning rate 5.0000
|