Pathfinder / static /model_shards /pytorch_model.bin.index.json
celise88's picture
add model shards to reduce memory consumption
88a5ae5
raw
history blame
9.74 kB
{
"metadata": {
"total_size": 267820040
},
"weight_map": {
"classifier.bias": "pytorch_model-00006-of-00006.bin",
"classifier.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.embeddings.LayerNorm.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.embeddings.LayerNorm.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.embeddings.position_embeddings.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.embeddings.word_embeddings.weight": "pytorch_model-00002-of-00006.bin",
"distilbert.transformer.layer.0.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.ffn.lin2.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.ffn.lin2.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.output_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.output_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.0.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.1.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.1.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.1.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.1.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.1.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
"distilbert.transformer.layer.2.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.ffn.lin1.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.ffn.lin1.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.2.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.3.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.3.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.3.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.3.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.3.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.3.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.3.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
"distilbert.transformer.layer.4.attention.k_lin.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.k_lin.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.out_lin.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.out_lin.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.v_lin.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.attention.v_lin.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.sa_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.4.sa_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.5.attention.k_lin.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.attention.k_lin.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.attention.out_lin.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.attention.out_lin.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.5.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
"distilbert.transformer.layer.5.attention.v_lin.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.attention.v_lin.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.ffn.lin1.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.ffn.lin1.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.ffn.lin2.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.ffn.lin2.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.output_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.output_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.sa_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
"distilbert.transformer.layer.5.sa_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
"pre_classifier.bias": "pytorch_model-00006-of-00006.bin",
"pre_classifier.weight": "pytorch_model-00006-of-00006.bin"
}
}