finnstrom3693 commited on
Commit
2418aff
·
verified ·
1 Parent(s): 4a8b61c

Upload Minisun Trained using model.fit on NeelNanda/pile-10k[0-5000],lr 1e-4,cw 128,2 epoch,batch size 8,cosine with restart

Browse files
Files changed (2) hide show
  1. config.json +2 -2
  2. tf_model.h5 +1 -1
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "dropout_rate": 0.1,
9
  "weight_decay": 0.01,
10
  "learning_rate": 0.0001,
11
- "restart_period": 416,
12
- "total_steps": 1250,
13
  "warmup_ratio": 0.2
14
  }
 
8
  "dropout_rate": 0.1,
9
  "weight_decay": 0.01,
10
  "learning_rate": 0.0001,
11
+ "restart_period": 208,
12
+ "total_steps": 625,
13
  "warmup_ratio": 0.2
14
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9065bfc06ad125e999a4b1333a1fc78a53f348fb28290df10d4329d5e83b317c
3
  size 1902620824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eb59b3535f418b15136a046197653a4c4e3f34458d88dc5e5ed4ca387f1b910
3
  size 1902620824