Hello everyone !
First topic, first post for me so if i miss some explaination, i will update it as soon as possible.
I I am fine tuning a maskformer model with a custom COCO dataset. I have some issue when i go for the forward_call to get the loss (function get_loss_dict)
Some of my picture do not raise any error and some of them raise this error. Don’t understand why.
Here the code of the dataset:
class CocoInstanceDataset(torch.utils.data.Dataset):
def __init__(self, ann_file, img_folder, processor=None, transform=None, increment_instance_ids:bool = True, increment_class_ids:bool = True):
with open(ann_file, 'r') as f:
self.coco = json.load(f)
self.coco['images'] = sorted(self.coco['images'], key=lambda x: x['id'])
self.img_folder = img_folder
self.processor = processor
self.transform = transform
self.increment_instance_ids = increment_instance_ids
self.increment_class_ids = increment_class_ids
self.id2label = {cat['id']:cat['name'] for cat in self.coco['categories']}
self.label2id = {cat['name']:cat['id'] for cat in self.coco['categories']}
def get_mask_from_segmentation(self, segmentation, instance_id, width, height):
mask = imantics.Polygons(segmentation).mask(width=width, height=height).array.astype(np.int64)
mask[mask>0]=instance_id
if mask.sum()>0:
return mask
def __getitem__(self, idx):
# get image & image infos
img_path = os.path.join(self.img_folder, self.coco['images'][idx]['file_name'])
image = np.array(Image.open(img_path)) #.convert('RGB')).transpose(2, 0, 1) #see if convert RGB is needed
# get annotations
annotations = [ann for ann in self.coco['annotations'] if ann['image_id']==self.coco['images'][idx]['id']]
masks = []
for i, ann in enumerate(annotations):
mask = self.get_mask_from_segmentation(segmentation=ann['segmentation'],
instance_id = i+self.increment_instance_ids,
width=self.coco['images'][idx]['width'],
height=self.coco['images'][idx]['height'])
if mask is not None:
masks.append(mask)
instance_id_2_category_id = {i+self.increment_instance_ids:ann['category_id']+self.increment_class_ids for i,ann in enumerate(annotations)}
# apply transformation
if self.transform is not None:
transformed = self.transform(image=image, masks=masks)
image, masks = transformed['image'], transformed['masks']
# convert to C, H, W
# image = image.transpose(2,0,1)
# tweak to process overlaping masks
mask_labels=[]
class_labels=[]
for i, mask in enumerate(masks):
encoding = self.processor(images=[image], segmentation_maps=[mask], instance_id_to_semantic_id=instance_id_2_category_id, return_tensors="pt")
if i==0:
pixel_values=encoding['pixel_values']
pixel_mask=encoding['pixel_mask']
mask_labels.append(encoding['mask_labels'][0])
class_labels.append(encoding['class_labels'][0])
inputs = {'pixel_values':pixel_values,
'pixel_mask':pixel_mask,
'mask_labels': torch.stack(mask_labels).squeeze(),
'class_labels': torch.stack(class_labels).squeeze()
}
inputs = {k: v.squeeze() if isinstance(v, torch.Tensor) else v[0] for k,v in inputs.items()}
if len(class_labels) == 1:
inputs["class_labels"] = torch.stack(class_labels).squeeze(dim=1)
return inputs
def __len__(self):
return len(self.coco['images'])
below the model call to get loss:
outputs = model(
pixel_values=batch["pixel_values"],
mask_labels=batch["mask_labels"],
class_labels=batch["class_labels"],
)
And below the error trace