steveice's picture
Training in progress, epoch 0
0351fb4
{
"_name_or_path": "MCG-NJU/videomae-large-finetuned-kinetics",
"architectures": [
"VideoMAEForVideoClassification"
],
"attention_probs_dropout_prob": 0.0,
"decoder_hidden_size": 512,
"decoder_intermediate_size": 2048,
"decoder_num_attention_heads": 8,
"decoder_num_hidden_layers": 12,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 1024,
"id2label": {
"0": "C_1",
"1": "C_2",
"2": "C_3",
"3": "C_4",
"4": "C_5",
"5": "C_6",
"6": "C_7",
"7": "C_8",
"8": "O_1",
"9": "O_2",
"10": "O_3",
"11": "O_4",
"12": "O_5",
"13": "O_6",
"14": "O_7",
"15": "P_1",
"16": "P_10",
"17": "P_11",
"18": "P_12",
"19": "P_2",
"20": "P_3",
"21": "P_4",
"22": "P_5",
"23": "P_6",
"24": "P_7",
"25": "P_8",
"26": "P_9",
"27": "Q_1",
"28": "Q_2",
"29": "Q_3",
"30": "Q_4",
"31": "Q_5",
"32": "Q_6",
"33": "Q_7",
"34": "T_1",
"35": "T_2",
"36": "T_3",
"37": "T_4",
"38": "T_5",
"39": "T_6",
"40": "T_7"
},
"image_size": 224,
"initializer_range": 0.02,
"intermediate_size": 4096,
"label2id": {
"C_1": 0,
"C_2": 1,
"C_3": 2,
"C_4": 3,
"C_5": 4,
"C_6": 5,
"C_7": 6,
"C_8": 7,
"O_1": 8,
"O_2": 9,
"O_3": 10,
"O_4": 11,
"O_5": 12,
"O_6": 13,
"O_7": 14,
"P_1": 15,
"P_10": 16,
"P_11": 17,
"P_12": 18,
"P_2": 19,
"P_3": 20,
"P_4": 21,
"P_5": 22,
"P_6": 23,
"P_7": 24,
"P_8": 25,
"P_9": 26,
"Q_1": 27,
"Q_2": 28,
"Q_3": 29,
"Q_4": 30,
"Q_5": 31,
"Q_6": 32,
"Q_7": 33,
"T_1": 34,
"T_2": 35,
"T_3": 36,
"T_4": 37,
"T_5": 38,
"T_6": 39,
"T_7": 40
},
"layer_norm_eps": 1e-12,
"model_type": "videomae",
"norm_pix_loss": true,
"num_attention_heads": 16,
"num_channels": 3,
"num_frames": 16,
"num_hidden_layers": 24,
"patch_size": 16,
"problem_type": "single_label_classification",
"qkv_bias": true,
"torch_dtype": "float32",
"transformers_version": "4.33.2",
"tubelet_size": 2,
"use_mean_pooling": true
}