log_level = 'INFO' load_from = None resume_from = None dist_params = dict(backend='nccl') workflow = [('train', 1)] checkpoint_config = dict(interval=20) evaluation = dict( interval=25, metric=['PCK', 'NME', 'AUC', 'EPE'], key_indicator='PCK', gpu_collect=True, res_folder='') optimizer = dict(type='Adam', lr=1e-05) optimizer_config = dict(grad_clip=None) lr_config = dict( policy='step', warmup='linear', warmup_iters=1000, warmup_ratio=0.001, step=[160, 180]) total_epochs = 100 log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook')]) channel_cfg = dict( num_output_channels=1, dataset_joints=1, dataset_channel=[[0]], inference_channel=[0], max_kpt_num=100) model = dict( type='EdgeCape', encoder_config=dict( type='SwinTransformerV2', embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=16, drop_path_rate=0.2, img_size=256), keypoint_head=dict( type='TwoStageHead', in_channels=768, transformer=dict( type='TwoStageSupportRefineTransformer', d_model=256, nhead=8, num_encoder_layers=3, num_decoder_layers=3, dim_feedforward=768, dropout=0.1, similarity_proj_dim=256, dynamic_proj_dim=128, activation='relu', normalize_before=False, return_intermediate_dec=True, use_bias_attn_module=True, attn_bias=True, max_hops=4), share_kpt_branch=False, num_decoder_layer=3, with_heatmap_loss=False, heatmap_loss_weight=2.0, skeleton_loss_weight=1.0, positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True), skeleton_head=dict(type='SkeletonPredictor', learn_skeleton=True), learn_skeleton=True, masked_supervision=True, masking_ratio=0.5, model_freeze='skeleton'), train_cfg=dict(), test_cfg=dict( flip_test=False, post_process='default', shift_heatmap=True, modulate_kernel=11), freeze_backbone=True) data_cfg = dict( image_size=[256, 256], heatmap_size=[64, 64], num_output_channels=1, num_joints=1, dataset_channel=[[0]], inference_channel=[0]) train_pipeline = [ dict(type='LoadImageFromFile'), dict( type='TopDownGetRandomScaleRotation', rot_factor=15, scale_factor=0.15), dict(type='TopDownAffineFewShot'), dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), dict(type='TopDownGenerateTargetFewShot', sigma=1), dict( type='Collect', keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs', 'category_id', 'skeleton' ]) ] valid_pipeline = [ dict(type='LoadImageFromFile'), dict(type='TopDownAffineFewShot'), dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), dict(type='TopDownGenerateTargetFewShot', sigma=1), dict( type='Collect', keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs', 'category_id', 'skeleton' ]) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='TopDownAffineFewShot'), dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), dict(type='TopDownGenerateTargetFewShot', sigma=1), dict( type='Collect', keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs', 'category_id', 'skeleton' ]) ] data_root = 'data/mp100' data = dict( samples_per_gpu=16, workers_per_gpu=8, train=dict( type='TransformerPoseDataset', ann_file='data/mp100/annotations/mp100_split2_train.json', img_prefix='data/mp100/images/', data_cfg=dict( image_size=[256, 256], heatmap_size=[64, 64], num_output_channels=1, num_joints=1, dataset_channel=[[0]], inference_channel=[0]), valid_class_ids=None, max_kpt_num=100, num_shots=1, pipeline=[ dict(type='LoadImageFromFile'), dict( type='TopDownGetRandomScaleRotation', rot_factor=15, scale_factor=0.15), dict(type='TopDownAffineFewShot'), dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), dict(type='TopDownGenerateTargetFewShot', sigma=1), dict( type='Collect', keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs', 'category_id', 'skeleton' ]) ]), val=dict( type='TransformerPoseDataset', ann_file='data/mp100/annotations/mp100_split2_val.json', img_prefix='data/mp100/images/', data_cfg=dict( image_size=[256, 256], heatmap_size=[64, 64], num_output_channels=1, num_joints=1, dataset_channel=[[0]], inference_channel=[0]), valid_class_ids=None, max_kpt_num=100, num_shots=1, num_queries=15, num_episodes=100, pipeline=[ dict(type='LoadImageFromFile'), dict(type='TopDownAffineFewShot'), dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), dict(type='TopDownGenerateTargetFewShot', sigma=1), dict( type='Collect', keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs', 'category_id', 'skeleton' ]) ]), test=dict( type='TestPoseDataset', ann_file='data/mp100/annotations/mp100_split2_test.json', img_prefix='data/mp100/images/', data_cfg=dict( image_size=[256, 256], heatmap_size=[64, 64], num_output_channels=1, num_joints=1, dataset_channel=[[0]], inference_channel=[0]), valid_class_ids=None, max_kpt_num=100, num_shots=1, num_queries=15, num_episodes=200, pck_threshold_list=[0.05, 0.1, 0.15, 0.2, 0.25], pipeline=[ dict(type='LoadImageFromFile'), dict(type='TopDownAffineFewShot'), dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), dict(type='TopDownGenerateTargetFewShot', sigma=1), dict( type='Collect', keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs', 'category_id', 'skeleton' ]) ])) vis_backends = [ dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend') ] visualizer = dict( type='PoseLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend') ], name='visualizer') shuffle_cfg = dict(interval=1)