stealth-edits / hparams /SE /gpt-j-6b.json
qinghuazhou
Initial commit
85e172b
raw
history blame contribute delete
767 Bytes
{
"rewrite_module_tmp": "transformer.h.{}.mlp.fc_in",
"layer_module_tmp": "transformer.h.{}",
"mlp_module_tmp": "transformer.h.{}.mlp",
"proj_module_tmp": "transformer.h.{}.mlp.fc_out",
"embedding_layer": "transformer.wte",
"v_loss_layer": 27,
"norm_learnables": {
"norm_weight": "transformer.h.{}.ln_1.weight",
"norm_bias": "transformer.h.{}.ln_1.bias"
},
"weights_to_modify": {
"w1_weight": "transformer.h.{}.mlp.fc_in.weight",
"w1_bias": "transformer.h.{}.mlp.fc_in.bias",
"w2_weight": "transformer.h.{}.mlp.fc_out.weight",
"w2_bias": "transformer.h.{}.mlp.fc_out.bias"
},
"activation": "gelu",
"n_embd": 4096,
"mlp_type": "type1",
"model_name": "gpt-j-6b"
}