File size: 9,424 Bytes
99df524
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
AUG:
  AA_TYPE: rand-m9-mstd0.5-inc1
  COLOR_JITTER: 0.4
  ENABLE: false
  GEN_MASK_LOADER: false
  INTERPOLATION: bicubic
  MASK_FRAMES: false
  MASK_RATIO: 0.0
  MASK_TUBE: false
  MASK_WINDOW_SIZE:
  - 8
  - 7
  - 7
  MAX_MASK_PATCHES_PER_BLOCK: null
  NUM_SAMPLE: 1
  RE_COUNT: 1
  RE_MODE: pixel
  RE_PROB: 0.25
  RE_SPLIT: false
AVA:
  ANNOTATION_DIR: /mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/
  BGR: false
  DETECTION_SCORE_THRESH: 0.9
  EXCLUSION_FILE: ava_val_excluded_timestamps_v2.2.csv
  FRAME_DIR: /mnt/fair-flash3-east/ava_trainval_frames.img/
  FRAME_LIST_DIR: /mnt/vol/gfsai-flash3-east/ai-group/users/haoqifan/ava/frame_list/
  FULL_TEST_ON_VAL: false
  GROUNDTRUTH_FILE: ava_val_v2.2.csv
  IMG_PROC_BACKEND: cv2
  LABEL_MAP_FILE: ava_action_list_v2.2_for_activitynet_2019.pbtxt
  TEST_FORCE_FLIP: false
  TEST_LISTS:
  - val.csv
  TEST_PREDICT_BOX_LISTS:
  - ava_val_predicted_boxes.csv
  TRAIN_GT_BOX_LISTS:
  - ava_train_v2.2.csv
  TRAIN_LISTS:
  - train.csv
  TRAIN_PCA_JITTER_ONLY: true
  TRAIN_PREDICT_BOX_LISTS: []
  TRAIN_USE_COLOR_AUGMENTATION: false
BENCHMARK:
  LOG_PERIOD: 100
  NUM_EPOCHS: 5
  SHUFFLE: true
BN:
  GLOBAL_SYNC: false
  NORM_TYPE: sync_batchnorm
  NUM_BATCHES_PRECISE: 200
  NUM_SPLITS: 1
  NUM_SYNC_DEVICES: 1
  USE_PRECISE_STATS: true
  WEIGHT_DECAY: 0.0
CONTRASTIVE:
  BN_MLP: false
  BN_SYNC_MLP: false
  DELTA_CLIPS_MAX: .inf
  DELTA_CLIPS_MIN: -.inf
  DIM: 128
  INTERP_MEMORY: false
  KNN_ON: true
  LENGTH: 239975
  LOCAL_SHUFFLE_BN: true
  MEM_TYPE: 1d
  MLP_DIM: 2048
  MOCO_MULTI_VIEW_QUEUE: false
  MOMENTUM: 0.5
  MOMENTUM_ANNEALING: false
  NUM_CLASSES_DOWNSTREAM: 400
  NUM_MLP_LAYERS: 1
  PREDICTOR_DEPTHS: []
  QUEUE_LEN: 65536
  SEQUENTIAL: false
  SIMCLR_DIST_ON: true
  SWAV_QEUE_LEN: 0
  T: 0.07
  TYPE: mem
DATA:
  COLOR_RND_GRAYSCALE: 0.0
  DECODING_BACKEND: torchvision
  DECODING_SHORT_SIZE: 256
  DUMMY_LOAD: false
  ENSEMBLE_METHOD: max
  IN22K_TRAINVAL: false
  IN22k_VAL_IN1K: ''
  INPUT_CHANNEL_NUM:
  - 3
  INV_UNIFORM_SAMPLE: true
  IN_VAL_CROP_RATIO: 0.875
  LOADER_CHUNK_OVERALL_SIZE: 0
  LOADER_CHUNK_SIZE: 0
  MEAN:
  - 0.45
  - 0.45
  - 0.45
  MULTI_LABEL: true
  NUM_FRAMES: 16
  PATH_LABEL_SEPARATOR: ' '
  PATH_PREFIX: kabr/KABR/dataset/image
  PATH_TO_DATA_DIR: kabr/KABR/annotation
  PATH_TO_PRELOAD_IMDB: ''
  RANDOM_FLIP: true
  REVERSE_INPUT_CHANNEL: true
  SAMPLING_RATE: 5
  SKIP_ROWS: 0
  SSL_BLUR_SIGMA_MAX:
  - 0.0
  - 2.0
  SSL_BLUR_SIGMA_MIN:
  - 0.0
  - 0.1
  SSL_COLOR_BRI_CON_SAT:
  - 0.2
  - 0.2
  - 0.2
  SSL_COLOR_HUE: 0.1
  SSL_COLOR_JITTER: true
  SSL_MOCOV2_AUG: false
  STD:
  - 0.225
  - 0.225
  - 0.225
  TARGET_FPS: 30
  TEST_CROP_SIZE: 300
  TIME_DIFF_PROB: 0.0
  TRAIN_CROP_NUM_SPATIAL: 1
  TRAIN_CROP_NUM_TEMPORAL: 1
  TRAIN_CROP_SIZE: 300
  TRAIN_JITTER_ASPECT_RELATIVE: []
  TRAIN_JITTER_FPS: 0.0
  TRAIN_JITTER_MOTION_SHIFT: false
  TRAIN_JITTER_SCALES:
  - 300
  - 400
  TRAIN_JITTER_SCALES_RELATIVE: []
  TRAIN_PCA_EIGVAL:
  - 0.225
  - 0.224
  - 0.229
  TRAIN_PCA_EIGVEC:
  - - -0.5675
    - 0.7192
    - 0.4009
  - - -0.5808
    - -0.0045
    - -0.814
  - - -0.5836
    - -0.6948
    - 0.4203
  USE_OFFSET_SAMPLING: false
DATA_LOADER:
  ENABLE_MULTI_THREAD_DECODE: false
  NUM_WORKERS: 8
  PIN_MEMORY: true
DEMO:
  BUFFER_SIZE: 0
  CLIP_VIS_SIZE: 10
  COMMON_CLASS_NAMES:
  - watch (a person)
  - talk to (e.g., self, a person, a group)
  - listen to (a person)
  - touch (an object)
  - carry/hold (an object)
  - walk
  - sit
  - lie/sleep
  - bend/bow (at the waist)
  COMMON_CLASS_THRES: 0.7
  DETECTRON2_CFG: COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
  DETECTRON2_THRESH: 0.9
  DETECTRON2_WEIGHTS: detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl
  DISPLAY_HEIGHT: 0
  DISPLAY_WIDTH: 0
  ENABLE: false
  FPS: 30
  GT_BOXES: ''
  INPUT_FORMAT: BGR
  INPUT_VIDEO: kabr/KABR/dataset/video/G0103.mp4
  LABEL_FILE_PATH: kabr/KABR/annotation/classes.json
  NUM_CLIPS_SKIP: 1
  NUM_VIS_INSTANCES: 1
  OUTPUT_FILE: kabr/KABR/dataset/predict/G0103.mp4
  OUTPUT_FPS: -1
  PREDS_BOXES: ''
  SLOWMO: 1
  STARTING_SECOND: 900
  THREAD_ENABLE: false
  UNCOMMON_CLASS_THRES: 0.3
  VIS_MODE: thres
  WEBCAM: -1
DETECTION:
  ALIGNED: true
  ENABLE: false
  ROI_XFORM_RESOLUTION: 7
  SPATIAL_SCALE_FACTOR: 16
DIST_BACKEND: nccl
LOG_MODEL_INFO: true
LOG_PERIOD: 10
MASK:
  DECODER_DEPTH: 0
  DECODER_EMBED_DIM: 512
  DECODER_SEP_POS_EMBED: false
  DEC_KV_KERNEL: []
  DEC_KV_STRIDE: []
  ENABLE: false
  HEAD_TYPE: separate
  MAE_ON: false
  MAE_RND_MASK: false
  NORM_PRED_PIXEL: true
  PER_FRAME_MASKING: false
  PRED_HOG: false
  PRETRAIN_DEPTH:
  - 15
  SCALE_INIT_BY_DEPTH: false
  TIME_STRIDE_LOSS: true
MIXUP:
  ALPHA: 0.8
  CUTMIX_ALPHA: 1.0
  ENABLE: false
  LABEL_SMOOTH_VALUE: 0.1
  PROB: 1.0
  SWITCH_PROB: 0.5
MODEL:
  ACT_CHECKPOINT: false
  ARCH: x3d
  DETACH_FINAL_FC: false
  DROPCONNECT_RATE: 0.0
  DROPOUT_RATE: 0.5
  FC_INIT_STD: 0.01
  FP16_ALLREDUCE: false
  FROZEN_BN: false
  HEAD_ACT: sigmoid
  LOSS_FUNC: EQL
  MODEL_NAME: X3D
  MULTI_PATHWAY_ARCH:
  - slowfast
  NUM_CLASSES: 8
  SINGLE_PATHWAY_ARCH:
  - 2d
  - c2d
  - i3d
  - slow
  - x3d
  - mvit
  - maskmvit
MULTIGRID:
  BN_BASE_SIZE: 8
  DEFAULT_B: 0
  DEFAULT_S: 0
  DEFAULT_T: 0
  EPOCH_FACTOR: 1.5
  EVAL_FREQ: 3
  LONG_CYCLE: false
  LONG_CYCLE_FACTORS:
  - - 0.25
    - 0.7071067811865476
  - - 0.5
    - 0.7071067811865476
  - - 0.5
    - 1
  - - 1
    - 1
  LONG_CYCLE_SAMPLING_RATE: 0
  SHORT_CYCLE: false
  SHORT_CYCLE_FACTORS:
  - 0.5
  - 0.7071067811865476
MVIT:
  CLS_EMBED_ON: true
  DEPTH: 16
  DIM_MUL: []
  DIM_MUL_IN_ATT: false
  DROPOUT_RATE: 0.0
  DROPPATH_RATE: 0.1
  EMBED_DIM: 96
  HEAD_INIT_SCALE: 1.0
  HEAD_MUL: []
  LAYER_SCALE_INIT_VALUE: 0.0
  MLP_RATIO: 4.0
  MODE: conv
  NORM: layernorm
  NORM_STEM: false
  NUM_HEADS: 1
  PATCH_2D: false
  PATCH_KERNEL:
  - 3
  - 7
  - 7
  PATCH_PADDING:
  - 2
  - 4
  - 4
  PATCH_STRIDE:
  - 2
  - 4
  - 4
  POOL_FIRST: false
  POOL_KVQ_KERNEL: null
  POOL_KV_STRIDE: []
  POOL_KV_STRIDE_ADAPTIVE: null
  POOL_Q_STRIDE: []
  QKV_BIAS: true
  REL_POS_SPATIAL: false
  REL_POS_TEMPORAL: false
  REL_POS_ZERO_INIT: false
  RESIDUAL_POOLING: false
  REV:
    BUFFER_LAYERS: []
    ENABLE: false
    PRE_Q_FUSION: avg
    RESPATH_FUSE: concat
    RES_PATH: conv
  SEPARATE_QKV: false
  SEP_POS_EMBED: false
  USE_ABS_POS: true
  USE_FIXED_SINCOS_POS: false
  USE_MEAN_POOLING: false
  ZERO_DECAY_POS_CLS: true
NONLOCAL:
  GROUP:
  - - 1
  - - 1
  - - 1
  - - 1
  INSTANTIATION: dot_product
  LOCATION:
  - - []
  - - []
  - - []
  - - []
  POOL:
  - - - 1
      - 2
      - 2
    - - 1
      - 2
      - 2
  - - - 1
      - 2
      - 2
    - - 1
      - 2
      - 2
  - - - 1
      - 2
      - 2
    - - 1
      - 2
      - 2
  - - - 1
      - 2
      - 2
    - - 1
      - 2
      - 2
NUM_GPUS: 8
NUM_SHARDS: 1
OUTPUT_DIR: kabr/KABR/logs/x3d-l-kabr
RESNET:
  DEPTH: 50
  INPLACE_RELU: true
  NUM_BLOCK_TEMP_KERNEL:
  - - 3
  - - 4
  - - 6
  - - 3
  NUM_GROUPS: 1
  SPATIAL_DILATIONS:
  - - 1
  - - 1
  - - 1
  - - 1
  SPATIAL_STRIDES:
  - - 1
  - - 2
  - - 2
  - - 2
  STRIDE_1X1: false
  TRANS_FUNC: x3d_transform
  WIDTH_PER_GROUP: 64
  ZERO_INIT_FINAL_BN: true
  ZERO_INIT_FINAL_CONV: false
RNG_SEED: 0
SHARD_ID: 0
SLOWFAST:
  ALPHA: 8
  BETA_INV: 8
  FUSION_CONV_CHANNEL_RATIO: 2
  FUSION_KERNEL_SZ: 5
SOLVER:
  BASE_LR: 0.05
  BASE_LR_SCALE_NUM_SHARDS: true
  BETAS:
  - 0.9
  - 0.999
  CLIP_GRAD_L2NORM: null
  CLIP_GRAD_VAL: null
  COSINE_AFTER_WARMUP: false
  COSINE_END_LR: 0.0
  DAMPENING: 0.0
  GAMMA: 0.1
  LARS_ON: false
  LAYER_DECAY: 1.0
  LRS: []
  LR_POLICY: cosine
  MAX_EPOCH: 120
  MOMENTUM: 0.9
  NESTEROV: true
  OPTIMIZING_METHOD: sgd
  STEPS: []
  STEP_SIZE: 1
  WARMUP_EPOCHS: 35.0
  WARMUP_FACTOR: 0.1
  WARMUP_START_LR: 0.01
  WEIGHT_DECAY: 5.0e-05
  ZERO_WD_1D_PARAM: false
TASK: ''
TENSORBOARD:
  CATEGORIES_PATH: ''
  CLASS_NAMES_PATH: kabr/KABR/annotation/classes.json
  CONFUSION_MATRIX:
    ENABLE: true
    FIGSIZE:
    - 8
    - 8
    SUBSET_PATH: kabr/KABR/annotation/classes.txt
  ENABLE: true
  HISTOGRAM:
    ENABLE: true
    FIGSIZE:
    - 8
    - 8
    SUBSET_PATH: kabr/KABR/annotation/classes.txt
    TOPK: 3
  LOG_DIR: ''
  MODEL_VIS:
    ACTIVATIONS: true
    COLORMAP: Pastel2
    ENABLE: true
    GRAD_CAM:
      COLORMAP: viridis
      ENABLE: true
      LAYER_LIST:
      - s5/pathway0_res14
      USE_TRUE_LABEL: false
    INPUT_VIDEO: true
    LAYER_LIST:
    - s5/pathway0_res14
    MODEL_WEIGHTS: true
    TOPK_PREDS: 1
  PREDICTIONS_PATH: ''
  WRONG_PRED_VIS:
    ENABLE: false
    SUBSET_PATH: ''
    TAG: Incorrectly classified videos.
TEST:
  BATCH_SIZE: 64
  CHECKPOINT_FILE_PATH: ''
  CHECKPOINT_TYPE: pytorch
  DATASET: charades
  ENABLE: false
  NUM_ENSEMBLE_VIEWS: 2
  NUM_SPATIAL_CROPS: 1
  NUM_TEMPORAL_CLIPS: []
  SAVE_RESULTS_PATH: kabr/KABR/logs/x3d-l-kabr/results.txt
TRAIN:
  AUTO_RESUME: true
  BATCH_SIZE: 64
  CHECKPOINT_CLEAR_NAME_PATTERN: []
  CHECKPOINT_EPOCH_RESET: true
  CHECKPOINT_FILE_PATH: slowfast/projects/x3d/x3d_l.pyth
  CHECKPOINT_INFLATE: false
  CHECKPOINT_IN_INIT: false
  CHECKPOINT_PERIOD: 5
  CHECKPOINT_TYPE: pytorch
  DATASET: charades
  ENABLE: true
  EVAL_PERIOD: 5
  KILL_LOSS_EXPLOSION_FACTOR: 0.0
  MIXED_PRECISION: false
VIS_MASK:
  ENABLE: false
X3D:
  BN_LIN5: false
  BOTTLENECK_FACTOR: 2.25
  CHANNELWISE_3x3x3: true
  DEPTH_FACTOR: 5.0
  DIM_C1: 12
  DIM_C5: 2048
  SCALE_RES2: false
  WIDTH_FACTOR: 2.0