zhb10086 commited on
Commit
47fb5bc
·
verified ·
1 Parent(s): bee9bb3

Upload 7 files

Browse files
20240922_172907.log.json ADDED
The diff for this file is too large to render. See raw diff
 
epoch_16.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff785b432141af48e0a3c4863bd9d5218c2d82245c06278ff1fdd332f4a32b3e
3
+ size 909495892
epoch_17.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5591b252f23e878fe6ccb035efb7cf02d846053c3f6f541fd22f5417a558f8b6
3
+ size 909495892
epoch_18.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15ce75e488490ba524256adc604b0ee3742f45349ce9f82a205f626beb428bd5
3
+ size 909495892
epoch_19.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74507a15e1c4f71c6c10eb41384a89e71a9786433f93b62b716e1d6003d8d353
3
+ size 909495892
epoch_20.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52703650ba2d116e2dfa9d67d54a7de2c4fc3001b63bdcf7a664a9dad60a4cb8
3
+ size 909495892
relation_afford_r101_caffe_c4_1x_regrad_vmrd_metagraspnet_vrd_vg_class_agnostic_2.py ADDED
@@ -0,0 +1,1070 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(
2
+ type='BN',
3
+ requires_grad=False,
4
+ mean=[123.675, 116.28, 103.53],
5
+ std=[1.0, 1.0, 1.0],
6
+ to_rgb=True)
7
+ model = dict(
8
+ type='FasterRCNNRelAfford',
9
+ backbone=dict(
10
+ type='mmdet.ResNet',
11
+ depth=101,
12
+ num_stages=3,
13
+ strides=(1, 2, 2),
14
+ dilations=(1, 1, 1),
15
+ out_indices=(2, ),
16
+ frozen_stages=1,
17
+ norm_cfg=dict(type='BN', requires_grad=False),
18
+ norm_eval=True,
19
+ style='caffe',
20
+ init_cfg=dict(
21
+ type='Pretrained',
22
+ checkpoint='open-mmlab://detectron2/resnet101_caffe')),
23
+ rpn_head=dict(
24
+ type='mmdet.RPNHead',
25
+ in_channels=1024,
26
+ feat_channels=1024,
27
+ anchor_generator=dict(
28
+ type='AnchorGenerator',
29
+ scales=[8, 16, 32],
30
+ ratios=[0.33, 0.5, 1.0, 2.0, 3.0],
31
+ strides=[16]),
32
+ bbox_coder=dict(
33
+ type='DeltaXYWHBBoxCoder',
34
+ target_means=[0.0, 0.0, 0.0, 0.0],
35
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
36
+ loss_cls=dict(
37
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
38
+ loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
39
+ roi_head=None,
40
+ child_head=dict(
41
+ type='invigorate.PairedRoIHead',
42
+ shared_head=dict(
43
+ type='invigorate.PairedResLayer',
44
+ depth=50,
45
+ stage=3,
46
+ stride=1,
47
+ style='caffe',
48
+ norm_eval=False,
49
+ share_weights=False),
50
+ paired_roi_extractor=dict(
51
+ type='invigorate.VMRNPairedRoIExtractor',
52
+ roi_layer=dict(type='RoIPool', output_size=7),
53
+ out_channels=1024,
54
+ featmap_strides=[16]),
55
+ relation_head=dict(
56
+ type='invigorate.BBoxPairHead',
57
+ with_avg_pool=True,
58
+ roi_feat_size=7,
59
+ in_channels=2048,
60
+ num_relations=1,
61
+ loss_cls=dict(
62
+ type='mmdet.CrossEntropyLoss',
63
+ use_sigmoid=False,
64
+ loss_weight=1.0))),
65
+ leaf_head=dict(
66
+ type='mmdet.StandardRoIHead',
67
+ shared_head=dict(
68
+ type='mmdet.ResLayer',
69
+ depth=50,
70
+ stage=3,
71
+ stride=1,
72
+ style='caffe',
73
+ norm_cfg=dict(type='BN', requires_grad=False),
74
+ norm_eval=True),
75
+ bbox_roi_extractor=dict(
76
+ type='mmdet.SingleRoIExtractor',
77
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
78
+ out_channels=1024,
79
+ featmap_strides=[16]),
80
+ bbox_head=dict(
81
+ type='mmdet.BBoxHead',
82
+ with_avg_pool=True,
83
+ with_reg=False,
84
+ roi_feat_size=7,
85
+ in_channels=2048,
86
+ num_classes=2,
87
+ loss_cls=dict(
88
+ type='mmdet.CrossEntropyLoss',
89
+ use_sigmoid=False,
90
+ loss_weight=1.0))),
91
+ train_cfg=dict(
92
+ rpn=dict(
93
+ assigner=dict(
94
+ type='MaxIoUAssigner',
95
+ pos_iou_thr=0.7,
96
+ neg_iou_thr=0.3,
97
+ min_pos_iou=0.3,
98
+ match_low_quality=True,
99
+ ignore_iof_thr=-1),
100
+ sampler=dict(
101
+ type='RandomSampler',
102
+ num=256,
103
+ pos_fraction=0.5,
104
+ neg_pos_ub=-1,
105
+ add_gt_as_proposals=False),
106
+ allowed_border=0,
107
+ pos_weight=-1,
108
+ debug=False),
109
+ rpn_proposal=dict(
110
+ nms_pre=12000,
111
+ max_per_img=2000,
112
+ nms=dict(type='nms', iou_threshold=0.7),
113
+ min_bbox_size=0),
114
+ rcnn=dict(
115
+ assigner=dict(
116
+ type='MaxIoUAssigner',
117
+ pos_iou_thr=0.5,
118
+ neg_iou_thr=0.5,
119
+ min_pos_iou=0.5,
120
+ match_low_quality=False,
121
+ ignore_iof_thr=-1),
122
+ sampler=dict(
123
+ type='RandomSampler',
124
+ num=256,
125
+ pos_fraction=0.25,
126
+ neg_pos_ub=-1,
127
+ add_gt_as_proposals=True),
128
+ pos_weight=-1,
129
+ debug=False),
130
+ child_head=dict(
131
+ assigner=dict(
132
+ type='MaxIoUAssigner',
133
+ pos_iou_thr=0.7,
134
+ neg_iou_thr=0.5,
135
+ min_pos_iou=0.7,
136
+ match_low_quality=False,
137
+ ignore_iof_thr=-1),
138
+ relation_sampler=dict(
139
+ type='RandomRelationSampler',
140
+ num=32,
141
+ pos_fraction=0.5,
142
+ cls_ratio_ub=1.0,
143
+ add_gt_as_proposals=True,
144
+ num_relation_cls=1,
145
+ neg_id=0),
146
+ pos_weight=-1,
147
+ online_data=True,
148
+ online_start_iteration=0),
149
+ leaf_head=dict(
150
+ assigner=dict(
151
+ type='MaxIoUAssigner',
152
+ pos_iou_thr=0.5,
153
+ neg_iou_thr=0.5,
154
+ min_pos_iou=0.5,
155
+ match_low_quality=False,
156
+ ignore_iof_thr=-1),
157
+ sampler=dict(
158
+ type='RandomSampler',
159
+ num=64,
160
+ pos_fraction=0.25,
161
+ neg_pos_ub=3.0,
162
+ add_gt_as_proposals=True),
163
+ pos_weight=-1,
164
+ debug=False)),
165
+ test_cfg=dict(
166
+ rpn=dict(
167
+ nms_pre=6000,
168
+ max_per_img=300,
169
+ nms=dict(type='nms', iou_threshold=0.7),
170
+ min_bbox_size=0),
171
+ rcnn=dict(
172
+ score_thr=0.05,
173
+ nms=dict(type='nms', iou_threshold=0.3),
174
+ max_per_img=300),
175
+ child_head=dict(
176
+ bbox_score_thr=0.5, verbose_relation=False, average_scores=False),
177
+ leaf_head=dict(score_thr=0.05, nms=None, max_per_img=300)))
178
+ dataset_type = 'REGRADAffordDataset'
179
+ data_root = 'data/regrad/'
180
+ img_norm_cfg = dict(
181
+ mean=[123.675, 116.28, 103.53], std=[1.0, 1.0, 1.0], to_rgb=True)
182
+ train_pipeline = [
183
+ dict(type='LoadImageFromFile', to_float32=True),
184
+ dict(
185
+ type='LoadAnnotationsCustom',
186
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
187
+ dict(type='RandomFlip', flip_ratio=0.5),
188
+ dict(type='PhotoMetricDistortion'),
189
+ dict(
190
+ type='RandomCrop', crop_type='random_keep', allow_negative_crop=False),
191
+ dict(type='Expand', mean=[123.675, 116.28, 103.53], ratio_range=(1, 2)),
192
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
193
+ dict(
194
+ type='Normalize',
195
+ mean=[123.675, 116.28, 103.53],
196
+ std=[1.0, 1.0, 1.0],
197
+ to_rgb=True),
198
+ dict(type='Pad', size_divisor=32),
199
+ dict(
200
+ type='DefaultFormatBundleCustom',
201
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
202
+ 'gt_relleaves']),
203
+ dict(
204
+ type='Collect',
205
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'])
206
+ ]
207
+ test_pipeline = [
208
+ dict(type='LoadImageFromFile'),
209
+ dict(type='LoadRelationProposals'),
210
+ dict(
211
+ type='MultiScaleFlipAug',
212
+ img_scale=(1000, 600),
213
+ flip=False,
214
+ transforms=[
215
+ dict(type='Resize', keep_ratio=True),
216
+ dict(
217
+ type='Normalize',
218
+ mean=[123.675, 116.28, 103.53],
219
+ std=[1.0, 1.0, 1.0],
220
+ to_rgb=True),
221
+ dict(type='Pad', size_divisor=32),
222
+ dict(type='ImageToTensor', keys=['img']),
223
+ dict(type='Collect', keys=['img', 'relation_proposals'])
224
+ ])
225
+ ]
226
+ data = dict(
227
+ train=dict(
228
+ _delete_=True,
229
+ type='ConcatDataset',
230
+ datasets=[
231
+ dict(
232
+ type='REGRADAffordDataset',
233
+ data_root='data/regrad/',
234
+ meta_info_file='dataset_train_5k/meta_infos.json',
235
+ ann_file='dataset_train_5k/objects.json',
236
+ img_prefix='dataset_train_5k/RGBImages',
237
+ seg_prefix='dataset_train_5k/SegmentationImages',
238
+ depth_prefix='dataset_train_5k/DepthImages',
239
+ pipeline=[
240
+ dict(type='LoadImageFromFile', to_float32=True),
241
+ dict(
242
+ type='LoadAnnotationsCustom',
243
+ keys=[
244
+ 'gt_bboxes', 'gt_labels', 'gt_relchilds',
245
+ 'gt_relleaves'
246
+ ]),
247
+ dict(type='RandomFlip', flip_ratio=0.5),
248
+ dict(type='PhotoMetricDistortion'),
249
+ dict(
250
+ type='RandomCrop',
251
+ crop_type='random_keep',
252
+ allow_negative_crop=False),
253
+ dict(type='Expand', mean=[123.675, 116.28, 103.53]),
254
+ dict(
255
+ type='Resize', img_scale=(1000, 600), keep_ratio=True),
256
+ dict(
257
+ type='Normalize',
258
+ mean=[123.675, 116.28, 103.53],
259
+ std=[1.0, 1.0, 1.0],
260
+ to_rgb=True),
261
+ dict(type='Pad', size_divisor=32),
262
+ dict(
263
+ type='DefaultFormatBundleCustom',
264
+ keys=[
265
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
266
+ 'gt_relleaves'
267
+ ]),
268
+ dict(
269
+ type='Collect',
270
+ keys=[
271
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
272
+ 'gt_relleaves'
273
+ ])
274
+ ],
275
+ min_pos_relation=1,
276
+ class_agnostic=True),
277
+ dict(
278
+ type='MetaGraspNetAffordDataset',
279
+ data_root='data/metagraspnet/sim/',
280
+ meta_info_file='meta_infos_train.json',
281
+ pipeline=[
282
+ dict(type='LoadImageFromFile', to_float32=True),
283
+ dict(
284
+ type='LoadAnnotationsCustom',
285
+ keys=[
286
+ 'gt_bboxes', 'gt_labels', 'gt_relchilds',
287
+ 'gt_relleaves'
288
+ ]),
289
+ dict(type='RandomFlip', flip_ratio=0.5),
290
+ dict(type='PhotoMetricDistortion'),
291
+ dict(
292
+ type='RandomCrop',
293
+ crop_type='random_keep',
294
+ allow_negative_crop=False),
295
+ dict(
296
+ type='Expand',
297
+ mean=[123.675, 116.28, 103.53],
298
+ ratio_range=(1, 2)),
299
+ dict(
300
+ type='Resize', img_scale=(1000, 600), keep_ratio=True),
301
+ dict(
302
+ type='Normalize',
303
+ mean=[123.675, 116.28, 103.53],
304
+ std=[1.0, 1.0, 1.0],
305
+ to_rgb=True),
306
+ dict(type='Pad', size_divisor=32),
307
+ dict(
308
+ type='DefaultFormatBundleCustom',
309
+ keys=[
310
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
311
+ 'gt_relleaves'
312
+ ]),
313
+ dict(
314
+ type='Collect',
315
+ keys=[
316
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
317
+ 'gt_relleaves'
318
+ ])
319
+ ],
320
+ min_pos_relation=1,
321
+ class_agnostic=True),
322
+ dict(
323
+ type='VMRDAffordDataset',
324
+ ann_file='data/vmrd/ImageSets/Main/trainval.txt',
325
+ img_prefix='data/vmrd/',
326
+ pipeline=[
327
+ dict(type='LoadImageFromFile', to_float32=True),
328
+ dict(
329
+ type='LoadAnnotationsCustom',
330
+ keys=[
331
+ 'gt_bboxes', 'gt_labels', 'gt_relchilds',
332
+ 'gt_relleaves'
333
+ ]),
334
+ dict(type='RandomFlip', flip_ratio=0.5),
335
+ dict(type='PhotoMetricDistortion'),
336
+ dict(type='Expand', mean=[123.675, 116.28, 103.53]),
337
+ dict(
338
+ type='Resize', img_scale=(1000, 600), keep_ratio=True),
339
+ dict(
340
+ type='Normalize',
341
+ mean=[123.675, 116.28, 103.53],
342
+ std=[1.0, 1.0, 1.0],
343
+ to_rgb=True),
344
+ dict(type='Pad', size_divisor=32),
345
+ dict(
346
+ type='DefaultFormatBundleCustom',
347
+ keys=[
348
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
349
+ 'gt_relleaves'
350
+ ]),
351
+ dict(
352
+ type='Collect',
353
+ keys=[
354
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
355
+ 'gt_relleaves'
356
+ ])
357
+ ],
358
+ class_agnostic=True),
359
+ dict(
360
+ type='VRDAffordDataset',
361
+ data_root='data/vrd/',
362
+ ann_file='sg_dataset/sg_train_annotations.json',
363
+ img_prefix='sg_dataset/sg_train_images/',
364
+ pipeline=[
365
+ dict(type='LoadImageFromFile', to_float32=True),
366
+ dict(
367
+ type='LoadAnnotationsCustom',
368
+ keys=[
369
+ 'gt_bboxes', 'gt_labels', 'gt_relchilds',
370
+ 'gt_relleaves'
371
+ ]),
372
+ dict(type='RandomFlip', flip_ratio=0.5),
373
+ dict(
374
+ type='Resize', img_scale=(1000, 600), keep_ratio=True),
375
+ dict(
376
+ type='Normalize',
377
+ mean=[123.675, 116.28, 103.53],
378
+ std=[1.0, 1.0, 1.0],
379
+ to_rgb=True),
380
+ dict(type='Pad', size_divisor=32),
381
+ dict(
382
+ type='DefaultFormatBundleCustom',
383
+ keys=[
384
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
385
+ 'gt_relleaves'
386
+ ]),
387
+ dict(
388
+ type='Collect',
389
+ keys=[
390
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
391
+ 'gt_relleaves'
392
+ ])
393
+ ],
394
+ class_agnostic=True),
395
+ dict(
396
+ type='VGAffordDataset',
397
+ data_root='data/vg/downloads',
398
+ ann_file='relationships.json',
399
+ img_prefix='',
400
+ pipeline=[
401
+ dict(type='LoadImageFromFile', to_float32=True),
402
+ dict(
403
+ type='LoadAnnotationsCustom',
404
+ keys=[
405
+ 'gt_bboxes', 'gt_labels', 'gt_relchilds',
406
+ 'gt_relleaves'
407
+ ]),
408
+ dict(type='RandomFlip', flip_ratio=0.5),
409
+ dict(
410
+ type='Resize', img_scale=(1000, 600), keep_ratio=True),
411
+ dict(
412
+ type='Normalize',
413
+ mean=[123.675, 116.28, 103.53],
414
+ std=[1.0, 1.0, 1.0],
415
+ to_rgb=True),
416
+ dict(type='Pad', size_divisor=32),
417
+ dict(
418
+ type='DefaultFormatBundleCustom',
419
+ keys=[
420
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
421
+ 'gt_relleaves'
422
+ ]),
423
+ dict(
424
+ type='Collect',
425
+ keys=[
426
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
427
+ 'gt_relleaves'
428
+ ])
429
+ ],
430
+ class_agnostic=True)
431
+ ],
432
+ separate_eval=True,
433
+ class_agnostic=True),
434
+ val=dict(
435
+ _delete_=True,
436
+ type='ConcatDataset',
437
+ datasets=[
438
+ dict(
439
+ type='REGRADAffordDataset',
440
+ data_root='data/regrad/',
441
+ using_depth=False,
442
+ using_gt_proposals=True,
443
+ meta_info_file='dataset_seen_val_1k/meta_infos.json',
444
+ ann_file='dataset_seen_val_1k/objects.json',
445
+ img_prefix='dataset_seen_val_1k/RGBImages',
446
+ seg_prefix='dataset_seen_val_1k/SegmentationImages',
447
+ depth_prefix='dataset_seen_val_1k/DepthImages',
448
+ test_mode=True,
449
+ pipeline=[
450
+ dict(type='LoadImageFromFile'),
451
+ dict(type='LoadRelationProposals'),
452
+ dict(
453
+ type='MultiScaleFlipAug',
454
+ img_scale=(1000, 600),
455
+ flip=False,
456
+ transforms=[
457
+ dict(type='Resize', keep_ratio=True),
458
+ dict(
459
+ type='Normalize',
460
+ mean=[123.675, 116.28, 103.53],
461
+ std=[1.0, 1.0, 1.0],
462
+ to_rgb=True),
463
+ dict(type='Pad', size_divisor=32),
464
+ dict(type='ImageToTensor', keys=['img']),
465
+ dict(
466
+ type='Collect',
467
+ keys=['img', 'relation_proposals'])
468
+ ])
469
+ ],
470
+ class_agnostic=True,
471
+ max_sample_num=1000),
472
+ dict(
473
+ type='VMRDAffordDataset',
474
+ ann_file='data/vmrd/ImageSets/Main/test.txt',
475
+ img_prefix='data/vmrd/',
476
+ using_gt_proposals=True,
477
+ pipeline=[
478
+ dict(type='LoadImageFromFile'),
479
+ dict(type='LoadRelationProposals'),
480
+ dict(
481
+ type='MultiScaleFlipAug',
482
+ img_scale=(1000, 600),
483
+ flip=False,
484
+ transforms=[
485
+ dict(type='Resize', keep_ratio=True),
486
+ dict(
487
+ type='Normalize',
488
+ mean=[123.675, 116.28, 103.53],
489
+ std=[1.0, 1.0, 1.0],
490
+ to_rgb=True),
491
+ dict(type='Pad', size_divisor=32),
492
+ dict(type='ImageToTensor', keys=['img']),
493
+ dict(
494
+ type='Collect',
495
+ keys=['img', 'relation_proposals'])
496
+ ])
497
+ ],
498
+ class_agnostic=True)
499
+ ],
500
+ separate_eval=True,
501
+ class_agnostic=True),
502
+ test=dict(
503
+ _delete_=True,
504
+ type='ConcatDataset',
505
+ datasets=[
506
+ dict(
507
+ type='REGRADAffordDataset',
508
+ data_root='data/regrad/',
509
+ using_depth=False,
510
+ using_gt_proposals=True,
511
+ meta_info_file='dataset_seen_val_1k/meta_infos.json',
512
+ ann_file='dataset_seen_val_1k/objects.json',
513
+ img_prefix='dataset_seen_val_1k/RGBImages',
514
+ seg_prefix='dataset_seen_val_1k/SegmentationImages',
515
+ depth_prefix='dataset_seen_val_1k/DepthImages',
516
+ test_mode=True,
517
+ pipeline=[
518
+ dict(type='LoadImageFromFile'),
519
+ dict(type='LoadRelationProposals'),
520
+ dict(
521
+ type='MultiScaleFlipAug',
522
+ img_scale=(1000, 600),
523
+ flip=False,
524
+ transforms=[
525
+ dict(type='Resize', keep_ratio=True),
526
+ dict(
527
+ type='Normalize',
528
+ mean=[123.675, 116.28, 103.53],
529
+ std=[1.0, 1.0, 1.0],
530
+ to_rgb=True),
531
+ dict(type='Pad', size_divisor=32),
532
+ dict(type='ImageToTensor', keys=['img']),
533
+ dict(
534
+ type='Collect',
535
+ keys=['img', 'relation_proposals'])
536
+ ])
537
+ ],
538
+ class_agnostic=True,
539
+ max_sample_num=1000),
540
+ dict(
541
+ type='VMRDAffordDataset',
542
+ ann_file='data/vmrd/ImageSets/Main/test.txt',
543
+ img_prefix='data/vmrd/',
544
+ using_gt_proposals=True,
545
+ pipeline=[
546
+ dict(type='LoadImageFromFile'),
547
+ dict(type='LoadRelationProposals'),
548
+ dict(
549
+ type='MultiScaleFlipAug',
550
+ img_scale=(1000, 600),
551
+ flip=False,
552
+ transforms=[
553
+ dict(type='Resize', keep_ratio=True),
554
+ dict(
555
+ type='Normalize',
556
+ mean=[123.675, 116.28, 103.53],
557
+ std=[1.0, 1.0, 1.0],
558
+ to_rgb=True),
559
+ dict(type='Pad', size_divisor=32),
560
+ dict(type='ImageToTensor', keys=['img']),
561
+ dict(
562
+ type='Collect',
563
+ keys=['img', 'relation_proposals'])
564
+ ])
565
+ ],
566
+ class_agnostic=True)
567
+ ],
568
+ separate_eval=True,
569
+ class_agnostic=True),
570
+ samples_per_gpu=4,
571
+ workers_per_gpu=2)
572
+ evaluation = dict(interval=1, metric=['mAP', 'ImgAcc'])
573
+ optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
574
+ optimizer_config = dict(grad_clip=dict(max_norm=100, norm_type=2))
575
+ lr_config = dict(
576
+ policy='step',
577
+ warmup='linear',
578
+ warmup_iters=4000,
579
+ warmup_ratio=0.001,
580
+ step=[12, 18])
581
+ runner = dict(type='EpochBasedRunner', max_epochs=20)
582
+ checkpoint_config = dict(interval=1, max_keep_ckpts=5)
583
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
584
+ dist_params = dict(backend='nccl')
585
+ log_level = 'INFO'
586
+ load_from = None
587
+ resume_from = 'work_dirs/relation_afford_r101_caffe_c4_1x_regrad_vmrd_metagraspnet_vrd_vg_class_agnostic_2/latest.pth'
588
+ workflow = [('train', 1)]
589
+ opencv_num_threads = 0
590
+ mp_start_method = 'fork'
591
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
592
+ mmdet = None
593
+ mmdet_root = '/data/home/hanbo/projects/cloud_services/service/vmrn/vmrn_models/mmdetection/mmdet'
594
+ test_with_object_detector = False
595
+ test_crop_config = (174, 79, 462, 372)
596
+ kinect_img_pipeline = [
597
+ dict(type='LoadImageFromFile'),
598
+ dict(type='LoadRelationProposals'),
599
+ dict(
600
+ type='FixedCrop',
601
+ crop_type='absolute',
602
+ top_left=(174, 79),
603
+ bottom_right=(462, 372)),
604
+ dict(
605
+ type='MultiScaleFlipAug',
606
+ img_scale=(1000, 600),
607
+ flip=False,
608
+ transforms=[
609
+ dict(type='Resize', keep_ratio=True),
610
+ dict(
611
+ type='Normalize',
612
+ mean=[123.675, 116.28, 103.53],
613
+ std=[1.0, 1.0, 1.0],
614
+ to_rgb=True),
615
+ dict(type='Pad', size_divisor=32),
616
+ dict(type='ImageToTensor', keys=['img']),
617
+ dict(type='Collect', keys=['img', 'relation_proposals'])
618
+ ])
619
+ ]
620
+ seen_val_dataset = dict(
621
+ type='REGRADAffordDataset',
622
+ data_root='data/regrad/',
623
+ using_depth=False,
624
+ using_gt_proposals=True,
625
+ meta_info_file='dataset_seen_val_1k/meta_infos.json',
626
+ ann_file='dataset_seen_val_1k/objects.json',
627
+ img_prefix='dataset_seen_val_1k/RGBImages',
628
+ seg_prefix='dataset_seen_val_1k/SegmentationImages',
629
+ depth_prefix='dataset_seen_val_1k/DepthImages',
630
+ test_mode=True,
631
+ pipeline=[
632
+ dict(type='LoadImageFromFile'),
633
+ dict(type='LoadRelationProposals'),
634
+ dict(
635
+ type='MultiScaleFlipAug',
636
+ img_scale=(1000, 600),
637
+ flip=False,
638
+ transforms=[
639
+ dict(type='Resize', keep_ratio=True),
640
+ dict(
641
+ type='Normalize',
642
+ mean=[123.675, 116.28, 103.53],
643
+ std=[1.0, 1.0, 1.0],
644
+ to_rgb=True),
645
+ dict(type='Pad', size_divisor=32),
646
+ dict(type='ImageToTensor', keys=['img']),
647
+ dict(type='Collect', keys=['img', 'relation_proposals'])
648
+ ])
649
+ ],
650
+ class_agnostic=True,
651
+ max_sample_num=1000)
652
+ unseen_val_dataset = dict(
653
+ type='REGRADAffordDataset',
654
+ data_root='data/regrad/',
655
+ using_depth=False,
656
+ using_gt_proposals=True,
657
+ meta_info_file='dataset_unseen_val_1k/meta_infos.json',
658
+ ann_file='dataset_unseen_val_1k/objects.json',
659
+ img_prefix='dataset_unseen_val_1k/RGBImages',
660
+ seg_prefix='dataset_unseen_val_1k/SegmentationImages',
661
+ depth_prefix='dataset_unseen_val_1k/DepthImages',
662
+ test_mode=True,
663
+ pipeline=[
664
+ dict(type='LoadImageFromFile'),
665
+ dict(type='LoadRelationProposals'),
666
+ dict(
667
+ type='MultiScaleFlipAug',
668
+ img_scale=(1000, 600),
669
+ flip=False,
670
+ transforms=[
671
+ dict(type='Resize', keep_ratio=True),
672
+ dict(
673
+ type='Normalize',
674
+ mean=[123.675, 116.28, 103.53],
675
+ std=[1.0, 1.0, 1.0],
676
+ to_rgb=True),
677
+ dict(type='Pad', size_divisor=32),
678
+ dict(type='ImageToTensor', keys=['img']),
679
+ dict(type='Collect', keys=['img', 'relation_proposals'])
680
+ ])
681
+ ],
682
+ class_agnostic=True,
683
+ max_sample_num=1000)
684
+ real_val_dataset = dict(
685
+ type='REGRADAffordDataset',
686
+ data_root='data/regrad/',
687
+ using_depth=False,
688
+ using_gt_proposals=True,
689
+ meta_info_file='real/meta_infos.json',
690
+ ann_file='real/objects.json',
691
+ img_prefix='real/RGBImages',
692
+ img_suffix='png',
693
+ depth_prefix='real/DepthImages',
694
+ test_mode=True,
695
+ test_gt_bbox_offset=(174, 79),
696
+ pipeline=[
697
+ dict(type='LoadImageFromFile'),
698
+ dict(type='LoadRelationProposals'),
699
+ dict(
700
+ type='FixedCrop',
701
+ crop_type='absolute',
702
+ top_left=(174, 79),
703
+ bottom_right=(462, 372)),
704
+ dict(
705
+ type='MultiScaleFlipAug',
706
+ img_scale=(1000, 600),
707
+ flip=False,
708
+ transforms=[
709
+ dict(type='Resize', keep_ratio=True),
710
+ dict(
711
+ type='Normalize',
712
+ mean=[123.675, 116.28, 103.53],
713
+ std=[1.0, 1.0, 1.0],
714
+ to_rgb=True),
715
+ dict(type='Pad', size_divisor=32),
716
+ dict(type='ImageToTensor', keys=['img']),
717
+ dict(type='Collect', keys=['img', 'relation_proposals'])
718
+ ])
719
+ ],
720
+ class_agnostic=True)
721
+ regrad_datatype = 'REGRADAffordDataset'
722
+ regrad_root = 'data/regrad/'
723
+ vmrd_datatype = 'VMRDAffordDataset'
724
+ vmrd_root = 'data/vmrd/'
725
+ vmrd_train = dict(
726
+ type='VMRDAffordDataset',
727
+ ann_file='data/vmrd/ImageSets/Main/trainval.txt',
728
+ img_prefix='data/vmrd/',
729
+ pipeline=[
730
+ dict(type='LoadImageFromFile', to_float32=True),
731
+ dict(
732
+ type='LoadAnnotationsCustom',
733
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
734
+ dict(type='RandomFlip', flip_ratio=0.5),
735
+ dict(type='PhotoMetricDistortion'),
736
+ dict(type='Expand', mean=[123.675, 116.28, 103.53]),
737
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
738
+ dict(
739
+ type='Normalize',
740
+ mean=[123.675, 116.28, 103.53],
741
+ std=[1.0, 1.0, 1.0],
742
+ to_rgb=True),
743
+ dict(type='Pad', size_divisor=32),
744
+ dict(
745
+ type='DefaultFormatBundleCustom',
746
+ keys=[
747
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
748
+ ]),
749
+ dict(
750
+ type='Collect',
751
+ keys=[
752
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
753
+ ])
754
+ ],
755
+ class_agnostic=True)
756
+ regrad_train = dict(
757
+ type='REGRADAffordDataset',
758
+ data_root='data/regrad/',
759
+ meta_info_file='dataset_train_5k/meta_infos.json',
760
+ ann_file='dataset_train_5k/objects.json',
761
+ img_prefix='dataset_train_5k/RGBImages',
762
+ seg_prefix='dataset_train_5k/SegmentationImages',
763
+ depth_prefix='dataset_train_5k/DepthImages',
764
+ pipeline=[
765
+ dict(type='LoadImageFromFile', to_float32=True),
766
+ dict(
767
+ type='LoadAnnotationsCustom',
768
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
769
+ dict(type='RandomFlip', flip_ratio=0.5),
770
+ dict(type='PhotoMetricDistortion'),
771
+ dict(
772
+ type='RandomCrop',
773
+ crop_type='random_keep',
774
+ allow_negative_crop=False),
775
+ dict(type='Expand', mean=[123.675, 116.28, 103.53]),
776
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
777
+ dict(
778
+ type='Normalize',
779
+ mean=[123.675, 116.28, 103.53],
780
+ std=[1.0, 1.0, 1.0],
781
+ to_rgb=True),
782
+ dict(type='Pad', size_divisor=32),
783
+ dict(
784
+ type='DefaultFormatBundleCustom',
785
+ keys=[
786
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
787
+ ]),
788
+ dict(
789
+ type='Collect',
790
+ keys=[
791
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
792
+ ])
793
+ ],
794
+ min_pos_relation=1,
795
+ class_agnostic=True)
796
+ metagraspnet_sim_train = dict(
797
+ type='MetaGraspNetAffordDataset',
798
+ data_root='data/metagraspnet/sim/',
799
+ meta_info_file='meta_infos_train.json',
800
+ pipeline=[
801
+ dict(type='LoadImageFromFile', to_float32=True),
802
+ dict(
803
+ type='LoadAnnotationsCustom',
804
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
805
+ dict(type='RandomFlip', flip_ratio=0.5),
806
+ dict(type='PhotoMetricDistortion'),
807
+ dict(
808
+ type='RandomCrop',
809
+ crop_type='random_keep',
810
+ allow_negative_crop=False),
811
+ dict(
812
+ type='Expand', mean=[123.675, 116.28, 103.53], ratio_range=(1, 2)),
813
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
814
+ dict(
815
+ type='Normalize',
816
+ mean=[123.675, 116.28, 103.53],
817
+ std=[1.0, 1.0, 1.0],
818
+ to_rgb=True),
819
+ dict(type='Pad', size_divisor=32),
820
+ dict(
821
+ type='DefaultFormatBundleCustom',
822
+ keys=[
823
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
824
+ ]),
825
+ dict(
826
+ type='Collect',
827
+ keys=[
828
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
829
+ ])
830
+ ],
831
+ min_pos_relation=1,
832
+ class_agnostic=True)
833
+ vgvrd_train_pipeline = [
834
+ dict(type='LoadImageFromFile', to_float32=True),
835
+ dict(
836
+ type='LoadAnnotationsCustom',
837
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
838
+ dict(type='RandomFlip', flip_ratio=0.5),
839
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
840
+ dict(
841
+ type='Normalize',
842
+ mean=[123.675, 116.28, 103.53],
843
+ std=[1.0, 1.0, 1.0],
844
+ to_rgb=True),
845
+ dict(type='Pad', size_divisor=32),
846
+ dict(
847
+ type='DefaultFormatBundleCustom',
848
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relchilds',
849
+ 'gt_relleaves']),
850
+ dict(
851
+ type='Collect',
852
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'])
853
+ ]
854
+ vrd_train = dict(
855
+ type='VRDAffordDataset',
856
+ data_root='data/vrd/',
857
+ ann_file='sg_dataset/sg_train_annotations.json',
858
+ img_prefix='sg_dataset/sg_train_images/',
859
+ pipeline=[
860
+ dict(type='LoadImageFromFile', to_float32=True),
861
+ dict(
862
+ type='LoadAnnotationsCustom',
863
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
864
+ dict(type='RandomFlip', flip_ratio=0.5),
865
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
866
+ dict(
867
+ type='Normalize',
868
+ mean=[123.675, 116.28, 103.53],
869
+ std=[1.0, 1.0, 1.0],
870
+ to_rgb=True),
871
+ dict(type='Pad', size_divisor=32),
872
+ dict(
873
+ type='DefaultFormatBundleCustom',
874
+ keys=[
875
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
876
+ ]),
877
+ dict(
878
+ type='Collect',
879
+ keys=[
880
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
881
+ ])
882
+ ],
883
+ class_agnostic=True)
884
+ vg_train = dict(
885
+ type='VGAffordDataset',
886
+ data_root='data/vg/downloads',
887
+ ann_file='relationships.json',
888
+ img_prefix='',
889
+ pipeline=[
890
+ dict(type='LoadImageFromFile', to_float32=True),
891
+ dict(
892
+ type='LoadAnnotationsCustom',
893
+ keys=['gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves']),
894
+ dict(type='RandomFlip', flip_ratio=0.5),
895
+ dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
896
+ dict(
897
+ type='Normalize',
898
+ mean=[123.675, 116.28, 103.53],
899
+ std=[1.0, 1.0, 1.0],
900
+ to_rgb=True),
901
+ dict(type='Pad', size_divisor=32),
902
+ dict(
903
+ type='DefaultFormatBundleCustom',
904
+ keys=[
905
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
906
+ ]),
907
+ dict(
908
+ type='Collect',
909
+ keys=[
910
+ 'img', 'gt_bboxes', 'gt_labels', 'gt_relchilds', 'gt_relleaves'
911
+ ])
912
+ ],
913
+ class_agnostic=True)
914
+ real_test_pipeline = [
915
+ dict(type='LoadImageFromFile'),
916
+ dict(type='LoadRelationProposals'),
917
+ dict(
918
+ type='FixedCrop',
919
+ crop_type='absolute',
920
+ top_left=(174, 79),
921
+ bottom_right=(462, 372)),
922
+ dict(
923
+ type='MultiScaleFlipAug',
924
+ img_scale=(1000, 600),
925
+ flip=False,
926
+ transforms=[
927
+ dict(type='Resize', keep_ratio=True),
928
+ dict(
929
+ type='Normalize',
930
+ mean=[123.675, 116.28, 103.53],
931
+ std=[1.0, 1.0, 1.0],
932
+ to_rgb=True),
933
+ dict(type='Pad', size_divisor=32),
934
+ dict(type='ImageToTensor', keys=['img']),
935
+ dict(type='Collect', keys=['img', 'relation_proposals'])
936
+ ])
937
+ ]
938
+ regrad_seen_val_dataset = dict(
939
+ type='REGRADAffordDataset',
940
+ data_root='data/regrad/',
941
+ using_depth=False,
942
+ using_gt_proposals=True,
943
+ meta_info_file='dataset_seen_val_1k/meta_infos.json',
944
+ ann_file='dataset_seen_val_1k/objects.json',
945
+ img_prefix='dataset_seen_val_1k/RGBImages',
946
+ seg_prefix='dataset_seen_val_1k/SegmentationImages',
947
+ depth_prefix='dataset_seen_val_1k/DepthImages',
948
+ test_mode=True,
949
+ pipeline=[
950
+ dict(type='LoadImageFromFile'),
951
+ dict(type='LoadRelationProposals'),
952
+ dict(
953
+ type='MultiScaleFlipAug',
954
+ img_scale=(1000, 600),
955
+ flip=False,
956
+ transforms=[
957
+ dict(type='Resize', keep_ratio=True),
958
+ dict(
959
+ type='Normalize',
960
+ mean=[123.675, 116.28, 103.53],
961
+ std=[1.0, 1.0, 1.0],
962
+ to_rgb=True),
963
+ dict(type='Pad', size_divisor=32),
964
+ dict(type='ImageToTensor', keys=['img']),
965
+ dict(type='Collect', keys=['img', 'relation_proposals'])
966
+ ])
967
+ ],
968
+ class_agnostic=True,
969
+ max_sample_num=1000)
970
+ regrad_unseen_val_dataset = dict(
971
+ type='REGRADAffordDataset',
972
+ data_root='data/regrad/',
973
+ using_depth=False,
974
+ using_gt_proposals=True,
975
+ meta_info_file='dataset_unseen_val_1k/meta_infos.json',
976
+ ann_file='dataset_unseen_val_1k/objects.json',
977
+ img_prefix='dataset_unseen_val_1k/RGBImages',
978
+ seg_prefix='dataset_unseen_val_1k/SegmentationImages',
979
+ depth_prefix='dataset_unseen_val_1k/DepthImages',
980
+ test_mode=True,
981
+ pipeline=[
982
+ dict(type='LoadImageFromFile'),
983
+ dict(type='LoadRelationProposals'),
984
+ dict(
985
+ type='MultiScaleFlipAug',
986
+ img_scale=(1000, 600),
987
+ flip=False,
988
+ transforms=[
989
+ dict(type='Resize', keep_ratio=True),
990
+ dict(
991
+ type='Normalize',
992
+ mean=[123.675, 116.28, 103.53],
993
+ std=[1.0, 1.0, 1.0],
994
+ to_rgb=True),
995
+ dict(type='Pad', size_divisor=32),
996
+ dict(type='ImageToTensor', keys=['img']),
997
+ dict(type='Collect', keys=['img', 'relation_proposals'])
998
+ ])
999
+ ],
1000
+ class_agnostic=True,
1001
+ max_sample_num=1000)
1002
+ regrad_real_val_dataset = dict(
1003
+ type='REGRADAffordDataset',
1004
+ data_root='data/regrad/',
1005
+ using_depth=False,
1006
+ using_gt_proposals=True,
1007
+ meta_info_file='real/meta_infos.json',
1008
+ ann_file='real/objects.json',
1009
+ img_prefix='real/RGBImages',
1010
+ img_suffix='png',
1011
+ depth_prefix='real/DepthImages',
1012
+ test_mode=True,
1013
+ test_gt_bbox_offset=(174, 79),
1014
+ pipeline=[
1015
+ dict(type='LoadImageFromFile'),
1016
+ dict(type='LoadRelationProposals'),
1017
+ dict(
1018
+ type='FixedCrop',
1019
+ crop_type='absolute',
1020
+ top_left=(174, 79),
1021
+ bottom_right=(462, 372)),
1022
+ dict(
1023
+ type='MultiScaleFlipAug',
1024
+ img_scale=(1000, 600),
1025
+ flip=False,
1026
+ transforms=[
1027
+ dict(type='Resize', keep_ratio=True),
1028
+ dict(
1029
+ type='Normalize',
1030
+ mean=[123.675, 116.28, 103.53],
1031
+ std=[1.0, 1.0, 1.0],
1032
+ to_rgb=True),
1033
+ dict(type='Pad', size_divisor=32),
1034
+ dict(type='ImageToTensor', keys=['img']),
1035
+ dict(type='Collect', keys=['img', 'relation_proposals'])
1036
+ ])
1037
+ ],
1038
+ class_agnostic=True)
1039
+ vmrd_val_dataset = dict(
1040
+ type='VMRDAffordDataset',
1041
+ ann_file='data/vmrd/ImageSets/Main/test.txt',
1042
+ img_prefix='data/vmrd/',
1043
+ using_gt_proposals=True,
1044
+ pipeline=[
1045
+ dict(type='LoadImageFromFile'),
1046
+ dict(type='LoadRelationProposals'),
1047
+ dict(
1048
+ type='MultiScaleFlipAug',
1049
+ img_scale=(1000, 600),
1050
+ flip=False,
1051
+ transforms=[
1052
+ dict(type='Resize', keep_ratio=True),
1053
+ dict(
1054
+ type='Normalize',
1055
+ mean=[123.675, 116.28, 103.53],
1056
+ std=[1.0, 1.0, 1.0],
1057
+ to_rgb=True),
1058
+ dict(type='Pad', size_divisor=32),
1059
+ dict(type='ImageToTensor', keys=['img']),
1060
+ dict(type='Collect', keys=['img', 'relation_proposals'])
1061
+ ])
1062
+ ],
1063
+ class_agnostic=True)
1064
+ train_sampler = dict(
1065
+ type='DistributedWeightedSampler',
1066
+ weights=[0.15, 0.15, 0.1, 0.05, 0.55],
1067
+ sample_per_epoch=150000,
1068
+ shuffle=True)
1069
+ work_dir = './work_dirs/relation_afford_r101_caffe_c4_1x_regrad_vmrd_metagraspnet_vrd_vg_class_agnostic_2'
1070
+ gpu_ids = range(0, 8)