Spaces:

XiaRho
/

SEMat

Running

App Files Files Community

SEMat / data /dim_dataset.py

XiaRho

Init

8b4c6c7 verified 4 months ago

raw

history blame

59.4 kB

	'''
	Dataloader to process Adobe Image Matting Dataset.

	From GCA_Matting(https://github.com/Yaoyi-Li/GCA-Matting/tree/master/dataloader)
	'''
	import os
	import glob
	import logging
	import os.path as osp
	import functools
	import numpy as np
	import torch
	import cv2
	import math
	import numbers
	import random
	import pickle
	from torch.utils.data import Dataset, DataLoader
	from torch.nn import functional as F
	from torchvision import transforms
	from easydict import EasyDict
	from detectron2.utils.logger import setup_logger
	from detectron2.utils import comm
	from detectron2.data import build_detection_test_loader
	import torchvision.transforms.functional

	import json
	from PIL import Image
	from detectron2.evaluation.evaluator import DatasetEvaluator
	from collections import defaultdict

	from data.evaluate import compute_sad_loss, compute_mse_loss, compute_mad_loss, compute_gradient_loss, compute_connectivity_error

	# Base default config
	CONFIG = EasyDict({})

	# Model config
	CONFIG.model = EasyDict({})
	# one-hot or class, choice: [3, 1]
	CONFIG.model.trimap_channel = 1

	# Dataloader config
	CONFIG.data = EasyDict({})
	# feed forward image size (untested)
	CONFIG.data.crop_size = 512
	# composition of two foregrounds, affine transform, crop and HSV jitter
	CONFIG.data.cutmask_prob = 0.25
	CONFIG.data.augmentation = True
	CONFIG.data.random_interp = True

	class Prefetcher():
	"""
	Modified from the data_prefetcher in https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
	"""
	def __init__(self, loader):
	self.orig_loader = loader
	self.stream = torch.cuda.Stream()
	self.next_sample = None

	def preload(self):
	try:
	self.next_sample = next(self.loader)
	except StopIteration:
	self.next_sample = None
	return

	with torch.cuda.stream(self.stream):
	for key, value in self.next_sample.items():
	if isinstance(value, torch.Tensor):
	self.next_sample[key] = value.cuda(non_blocking=True)

	def __next__(self):
	torch.cuda.current_stream().wait_stream(self.stream)
	sample = self.next_sample
	if sample is not None:
	for key, value in sample.items():
	if isinstance(value, torch.Tensor):
	sample[key].record_stream(torch.cuda.current_stream())
	self.preload()
	else:
	# throw stop exception if there is no more data to perform as a default dataloader
	raise StopIteration("No samples in loader. example: `iterator = iter(Prefetcher(loader)); "
	"data = next(iterator)`")
	return sample

	def __iter__(self):
	self.loader = iter(self.orig_loader)
	self.preload()
	return self


	class ImageFile(object):
	def __init__(self, phase='train'):
	self.phase = phase
	self.rng = np.random.RandomState(0)

	def _get_valid_names(self, *dirs, shuffle=True):
	name_sets = [self._get_name_set(d) for d in dirs]

	def _join_and(a, b):
	return a & b

	valid_names = list(functools.reduce(_join_and, name_sets))
	if shuffle:
	self.rng.shuffle(valid_names)

	return valid_names

	@staticmethod
	def _get_name_set(dir_name):
	path_list = glob.glob(os.path.join(dir_name, '*'))
	name_set = set()
	for path in path_list:
	name = os.path.basename(path)
	name = os.path.splitext(name)[0]
	name_set.add(name)
	return name_set

	@staticmethod
	def _list_abspath(data_dir, ext, data_list):
	return [os.path.join(data_dir, name + ext)
	for name in data_list]

	class ImageFileTrain(ImageFile):
	def __init__(
	self,
	alpha_dir="train_alpha",
	fg_dir="train_fg",
	bg_dir="train_bg",
	alpha_ext=".jpg",
	fg_ext=".jpg",
	bg_ext=".jpg",
	fg_have_bg_num=None,
	alpha_ratio_json = None,
	alpha_min_ratio = None,
	key_sample_ratio = None,
	):
	super(ImageFileTrain, self).__init__(phase="train")

	self.alpha_dir = alpha_dir
	self.fg_dir = fg_dir
	self.bg_dir = bg_dir
	self.alpha_ext = alpha_ext
	self.fg_ext = fg_ext
	self.bg_ext = bg_ext
	logger = setup_logger(name=__name__)

	if not isinstance(self.alpha_dir, str):
	assert len(self.alpha_dir) == len(self.fg_dir) == len(alpha_ext) == len(fg_ext)
	self.valid_fg_list = []
	self.alpha = []
	self.fg = []
	self.key_alpha = []
	self.key_fg = []
	for i in range(len(self.alpha_dir)):
	valid_fg_list = self._get_valid_names(self.fg_dir[i], self.alpha_dir[i])
	valid_fg_list.sort()
	alpha = self._list_abspath(self.alpha_dir[i], self.alpha_ext[i], valid_fg_list)
	fg = self._list_abspath(self.fg_dir[i], self.fg_ext[i], valid_fg_list)
	self.valid_fg_list += valid_fg_list

	self.alpha += alpha * fg_have_bg_num[i]
	self.fg += fg * fg_have_bg_num[i]

	if alpha_ratio_json[i] is not None:
	tmp_key_alpha = []
	tmp_key_fg = []
	name_to_alpha_path = dict()
	for name in alpha:
	name_to_alpha_path[name.split('/')[-1].split('.')[0]] = name
	name_to_fg_path = dict()
	for name in fg:
	name_to_fg_path[name.split('/')[-1].split('.')[0]] = name

	with open(alpha_ratio_json[i], 'r') as file:
	alpha_ratio_list = json.load(file)
	for ratio, name in alpha_ratio_list:
	if ratio < alpha_min_ratio[i]:
	break
	tmp_key_alpha.append(name_to_alpha_path[name.split('.')[0]])
	tmp_key_fg.append(name_to_fg_path[name.split('.')[0]])

	self.key_alpha.extend(tmp_key_alpha * fg_have_bg_num[i])
	self.key_fg.extend(tmp_key_fg * fg_have_bg_num[i])

	if len(self.key_alpha) != 0 and key_sample_ratio > 0:
	repeat_num = key_sample_ratio * (len(self.alpha) - len(self.key_alpha)) / len(self.key_alpha) / (1 - key_sample_ratio) - 1
	print('key sample num:', len(self.key_alpha), ', repeat num: ', repeat_num)
	for i in range(math.ceil(repeat_num)):
	self.alpha += self.key_alpha
	self.fg += self.key_fg

	else:
	self.valid_fg_list = self._get_valid_names(self.fg_dir, self.alpha_dir)
	self.valid_fg_list.sort()
	self.alpha = self._list_abspath(self.alpha_dir, self.alpha_ext, self.valid_fg_list)
	self.fg = self._list_abspath(self.fg_dir, self.fg_ext, self.valid_fg_list)

	self.valid_bg_list = [os.path.splitext(name)[0] for name in os.listdir(self.bg_dir)]
	self.valid_bg_list.sort()

	if fg_have_bg_num is not None:
	# assert fg_have_bg_num * len(self.valid_fg_list) <= len(self.valid_bg_list)
	# self.valid_bg_list = self.valid_bg_list[: fg_have_bg_num * len(self.valid_fg_list)]
	assert len(self.alpha) <= len(self.valid_bg_list)
	self.valid_bg_list = self.valid_bg_list[: len(self.alpha)]

	self.bg = self._list_abspath(self.bg_dir, self.bg_ext, self.valid_bg_list)

	def __len__(self):
	return len(self.alpha)

	class ImageFileTest(ImageFile):
	def __init__(self,
	alpha_dir="test_alpha",
	merged_dir="test_merged",
	trimap_dir="test_trimap",
	alpha_ext=".png",
	merged_ext=".png",
	trimap_ext=".png"):
	super(ImageFileTest, self).__init__(phase="test")

	self.alpha_dir = alpha_dir
	self.merged_dir = merged_dir
	self.trimap_dir = trimap_dir
	self.alpha_ext = alpha_ext
	self.merged_ext = merged_ext
	self.trimap_ext = trimap_ext

	self.valid_image_list = self._get_valid_names(self.alpha_dir, self.merged_dir, self.trimap_dir, shuffle=False)

	self.alpha = self._list_abspath(self.alpha_dir, self.alpha_ext, self.valid_image_list)
	self.merged = self._list_abspath(self.merged_dir, self.merged_ext, self.valid_image_list)
	self.trimap = self._list_abspath(self.trimap_dir, self.trimap_ext, self.valid_image_list)

	def __len__(self):
	return len(self.alpha)

	interp_list = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]


	def maybe_random_interp(cv2_interp):
	if CONFIG.data.random_interp:
	return np.random.choice(interp_list)
	else:
	return cv2_interp


	class ToTensor(object):
	"""
	Convert ndarrays in sample to Tensors with normalization.
	"""
	def __init__(self, phase="test"):
	self.mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
	self.std = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
	self.phase = phase

	def __call__(self, sample):
	image, alpha, trimap, mask = sample['image'][:,:,::-1], sample['alpha'], sample['trimap'], sample['mask']

	alpha[alpha < 0 ] = 0
	alpha[alpha > 1] = 1

	image = image.transpose((2, 0, 1)).astype(np.float32)
	alpha = np.expand_dims(alpha.astype(np.float32), axis=0)

	mask = np.expand_dims(mask.astype(np.float32), axis=0)

	image /= 255.

	if self.phase == "train":
	fg = sample['fg'][:,:,::-1].transpose((2, 0, 1)).astype(np.float32) / 255.
	sample['fg'] = torch.from_numpy(fg)
	bg = sample['bg'][:,:,::-1].transpose((2, 0, 1)).astype(np.float32) / 255.
	sample['bg'] = torch.from_numpy(bg)

	sample['image'], sample['alpha'], sample['trimap'] = \
	torch.from_numpy(image), torch.from_numpy(alpha), torch.from_numpy(trimap).to(torch.long)
	sample['image'] = sample['image']

	if CONFIG.model.trimap_channel == 3:
	sample['trimap'] = F.one_hot(sample['trimap'], num_classes=3).permute(2,0,1).float()
	elif CONFIG.model.trimap_channel == 1:
	sample['trimap'] = sample['trimap'][None,...].float()
	else:
	raise NotImplementedError("CONFIG.model.trimap_channel can only be 3 or 1")
	sample['trimap'][sample['trimap'] < 85] = 0
	sample['trimap'][sample['trimap'] >= 170] = 1
	sample['trimap'][sample['trimap'] >= 85] = 0.5

	sample['mask'] = torch.from_numpy(mask).float()

	return sample


	class RandomAffine(object):
	"""
	Random affine translation
	"""
	def __init__(self, degrees, translate=None, scale=None, shear=None, flip=None, resample=False, fillcolor=0):
	if isinstance(degrees, numbers.Number):
	if degrees < 0:
	raise ValueError("If degrees is a single number, it must be positive.")
	self.degrees = (-degrees, degrees)
	else:
	assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \
	"degrees should be a list or tuple and it must be of length 2."
	self.degrees = degrees

	if translate is not None:
	assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
	"translate should be a list or tuple and it must be of length 2."
	for t in translate:
	if not (0.0 <= t <= 1.0):
	raise ValueError("translation values should be between 0 and 1")
	self.translate = translate

	if scale is not None:
	assert isinstance(scale, (tuple, list)) and len(scale) == 2, \
	"scale should be a list or tuple and it must be of length 2."
	for s in scale:
	if s <= 0:
	raise ValueError("scale values should be positive")
	self.scale = scale

	if shear is not None:
	if isinstance(shear, numbers.Number):
	if shear < 0:
	raise ValueError("If shear is a single number, it must be positive.")
	self.shear = (-shear, shear)
	else:
	assert isinstance(shear, (tuple, list)) and len(shear) == 2, \
	"shear should be a list or tuple and it must be of length 2."
	self.shear = shear
	else:
	self.shear = shear

	self.resample = resample
	self.fillcolor = fillcolor
	self.flip = flip

	@staticmethod
	def get_params(degrees, translate, scale_ranges, shears, flip, img_size):
	"""Get parameters for affine transformation

	Returns:
	sequence: params to be passed to the affine transformation
	"""
	angle = random.uniform(degrees[0], degrees[1])
	if translate is not None:
	max_dx = translate[0] * img_size[0]
	max_dy = translate[1] * img_size[1]
	translations = (np.round(random.uniform(-max_dx, max_dx)),
	np.round(random.uniform(-max_dy, max_dy)))
	else:
	translations = (0, 0)

	if scale_ranges is not None:
	scale = (random.uniform(scale_ranges[0], scale_ranges[1]),
	random.uniform(scale_ranges[0], scale_ranges[1]))
	else:
	scale = (1.0, 1.0)

	if shears is not None:
	shear = random.uniform(shears[0], shears[1])
	else:
	shear = 0.0

	if flip is not None:
	flip = (np.random.rand(2) < flip).astype(np.int32) * 2 - 1

	return angle, translations, scale, shear, flip

	def __call__(self, sample):
	fg, alpha = sample['fg'], sample['alpha']
	rows, cols, ch = fg.shape
	if np.maximum(rows, cols) < 1024:
	params = self.get_params((0, 0), self.translate, self.scale, self.shear, self.flip, fg.size)
	else:
	params = self.get_params(self.degrees, self.translate, self.scale, self.shear, self.flip, fg.size)

	center = (cols * 0.5 + 0.5, rows * 0.5 + 0.5)
	M = self._get_inverse_affine_matrix(center, *params)
	M = np.array(M).reshape((2, 3))

	fg = cv2.warpAffine(fg, M, (cols, rows),
	flags=maybe_random_interp(cv2.INTER_NEAREST) + cv2.WARP_INVERSE_MAP)
	alpha = cv2.warpAffine(alpha, M, (cols, rows),
	flags=maybe_random_interp(cv2.INTER_NEAREST) + cv2.WARP_INVERSE_MAP)

	sample['fg'], sample['alpha'] = fg, alpha

	return sample


	@ staticmethod
	def _get_inverse_affine_matrix(center, angle, translate, scale, shear, flip):

	angle = math.radians(angle)
	shear = math.radians(shear)
	scale_x = 1.0 / scale[0] * flip[0]
	scale_y = 1.0 / scale[1] * flip[1]

	# Inverted rotation matrix with scale and shear
	d = math.cos(angle + shear) * math.cos(angle) + math.sin(angle + shear) * math.sin(angle)
	matrix = [
	math.cos(angle) * scale_x, math.sin(angle + shear) * scale_x, 0,
	-math.sin(angle) * scale_y, math.cos(angle + shear) * scale_y, 0
	]
	matrix = [m / d for m in matrix]

	# Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
	matrix[2] += matrix[0] * (-center[0] - translate[0]) + matrix[1] * (-center[1] - translate[1])
	matrix[5] += matrix[3] * (-center[0] - translate[0]) + matrix[4] * (-center[1] - translate[1])

	# Apply center translation: C * RSS^-1 * C^-1 * T^-1
	matrix[2] += center[0]
	matrix[5] += center[1]

	return matrix


	class RandomJitter(object):
	"""
	Random change the hue of the image
	"""

	def __call__(self, sample):
	sample_ori = sample.copy()
	fg, alpha = sample['fg'], sample['alpha']
	# if alpha is all 0 skip
	if np.all(alpha==0):
	return sample_ori
	# convert to HSV space, convert to float32 image to keep precision during space conversion.
	fg = cv2.cvtColor(fg.astype(np.float32)/255.0, cv2.COLOR_BGR2HSV)
	# Hue noise
	hue_jitter = np.random.randint(-40, 40)
	fg[:, :, 0] = np.remainder(fg[:, :, 0].astype(np.float32) + hue_jitter, 360)
	# Saturation noise
	sat_bar = fg[:, :, 1][alpha > 0].mean()
	if np.isnan(sat_bar):
	return sample_ori
	sat_jitter = np.random.rand()*(1.1 - sat_bar)/5 - (1.1 - sat_bar) / 10
	sat = fg[:, :, 1]
	sat = np.abs(sat + sat_jitter)
	sat[sat>1] = 2 - sat[sat>1]
	fg[:, :, 1] = sat
	# Value noise
	val_bar = fg[:, :, 2][alpha > 0].mean()
	if np.isnan(val_bar):
	return sample_ori
	val_jitter = np.random.rand()*(1.1 - val_bar)/5-(1.1 - val_bar) / 10
	val = fg[:, :, 2]
	val = np.abs(val + val_jitter)
	val[val>1] = 2 - val[val>1]
	fg[:, :, 2] = val
	# convert back to BGR space
	fg = cv2.cvtColor(fg, cv2.COLOR_HSV2BGR)
	sample['fg'] = fg*255

	return sample


	class RandomHorizontalFlip(object):
	"""
	Random flip image and label horizontally
	"""
	def __init__(self, prob=0.5):
	self.prob = prob
	def __call__(self, sample):
	fg, alpha = sample['fg'], sample['alpha']
	if np.random.uniform(0, 1) < self.prob:
	fg = cv2.flip(fg, 1)
	alpha = cv2.flip(alpha, 1)
	sample['fg'], sample['alpha'] = fg, alpha

	return sample


	class RandomCrop(object):
	"""
	Crop randomly the image in a sample, retain the center 1/4 images, and resize to 'output_size'

	:param output_size (tuple or int): Desired output size. If int, square crop
	is made.
	"""

	def __init__(self, output_size=( CONFIG.data.crop_size, CONFIG.data.crop_size)):
	assert isinstance(output_size, (int, tuple))
	if isinstance(output_size, int):
	self.output_size = (output_size, output_size)
	else:
	assert len(output_size) == 2
	self.output_size = output_size
	self.margin = output_size[0] // 2
	self.logger = logging.getLogger("Logger")

	def __call__(self, sample):
	fg, alpha, trimap, mask, name = sample['fg'], sample['alpha'], sample['trimap'], sample['mask'], sample['image_name']
	bg = sample['bg']
	h, w = trimap.shape
	bg = cv2.resize(bg, (w, h), interpolation=maybe_random_interp(cv2.INTER_CUBIC))
	if w < self.output_size[0]+1 or h < self.output_size[1]+1:
	ratio = 1.1self.output_size[0]/h if h < w else 1.1self.output_size[1]/w
	# self.logger.warning("Size of {} is {}.".format(name, (h, w)))
	while h < self.output_size[0]+1 or w < self.output_size[1]+1:
	fg = cv2.resize(fg, (int(wratio), int(hratio)), interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	alpha = cv2.resize(alpha, (int(wratio), int(hratio)),
	interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	trimap = cv2.resize(trimap, (int(wratio), int(hratio)), interpolation=cv2.INTER_NEAREST)
	bg = cv2.resize(bg, (int(wratio), int(hratio)), interpolation=maybe_random_interp(cv2.INTER_CUBIC))
	mask = cv2.resize(mask, (int(wratio), int(hratio)), interpolation=cv2.INTER_NEAREST)
	h, w = trimap.shape
	small_trimap = cv2.resize(trimap, (w//4, h//4), interpolation=cv2.INTER_NEAREST)
	unknown_list = list(zip(*np.where(small_trimap[self.margin//4:(h-self.margin)//4,
	self.margin//4:(w-self.margin)//4] == 128)))
	unknown_num = len(unknown_list)
	if len(unknown_list) < 10:
	left_top = (np.random.randint(0, h-self.output_size[0]+1), np.random.randint(0, w-self.output_size[1]+1))
	else:
	idx = np.random.randint(unknown_num)
	left_top = (unknown_list[idx][0]4, unknown_list[idx][1]4)

	fg_crop = fg[left_top[0]:left_top[0]+self.output_size[0], left_top[1]:left_top[1]+self.output_size[1],:]
	alpha_crop = alpha[left_top[0]:left_top[0]+self.output_size[0], left_top[1]:left_top[1]+self.output_size[1]]
	bg_crop = bg[left_top[0]:left_top[0]+self.output_size[0], left_top[1]:left_top[1]+self.output_size[1],:]
	trimap_crop = trimap[left_top[0]:left_top[0]+self.output_size[0], left_top[1]:left_top[1]+self.output_size[1]]
	mask_crop = mask[left_top[0]:left_top[0]+self.output_size[0], left_top[1]:left_top[1]+self.output_size[1]]

	if len(np.where(trimap==128)[0]) == 0:
	self.logger.error("{} does not have enough unknown area for crop. Resized to target size."
	"left_top: {}".format(name, left_top))
	fg_crop = cv2.resize(fg, self.output_size[::-1], interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	alpha_crop = cv2.resize(alpha, self.output_size[::-1], interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	trimap_crop = cv2.resize(trimap, self.output_size[::-1], interpolation=cv2.INTER_NEAREST)
	bg_crop = cv2.resize(bg, self.output_size[::-1], interpolation=maybe_random_interp(cv2.INTER_CUBIC))
	mask_crop = cv2.resize(mask, self.output_size[::-1], interpolation=cv2.INTER_NEAREST)

	sample.update({'fg': fg_crop, 'alpha': alpha_crop, 'trimap': trimap_crop, 'mask': mask_crop, 'bg': bg_crop})
	return sample


	class OriginScale(object):
	def __call__(self, sample):
	h, w = sample["alpha_shape"]

	if h % 32 == 0 and w % 32 == 0:
	return sample

	target_h = 32 * ((h - 1) // 32 + 1)
	target_w = 32 * ((w - 1) // 32 + 1)
	pad_h = target_h - h
	pad_w = target_w - w

	padded_image = np.pad(sample['image'], ((0,pad_h), (0, pad_w), (0,0)), mode="reflect")
	padded_trimap = np.pad(sample['trimap'], ((0,pad_h), (0, pad_w)), mode="reflect")
	padded_mask = np.pad(sample['mask'], ((0,pad_h), (0, pad_w)), mode="reflect")

	sample['image'] = padded_image
	sample['trimap'] = padded_trimap
	sample['mask'] = padded_mask

	return sample


	class GenMask(object):
	def __init__(self):
	self.erosion_kernels = [None] + [cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (size, size)) for size in range(1,30)]

	def __call__(self, sample):
	alpha_ori = sample['alpha']
	h, w = alpha_ori.shape

	max_kernel_size = 30
	alpha = cv2.resize(alpha_ori, (640,640), interpolation=maybe_random_interp(cv2.INTER_NEAREST))

	### generate trimap
	fg_mask = (alpha + 1e-5).astype(np.int32).astype(np.uint8)
	bg_mask = (1 - alpha + 1e-5).astype(np.int32).astype(np.uint8)
	fg_mask = cv2.erode(fg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])
	bg_mask = cv2.erode(bg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])

	fg_width = np.random.randint(1, 30)
	bg_width = np.random.randint(1, 30)
	fg_mask = (alpha + 1e-5).astype(np.int32).astype(np.uint8)
	bg_mask = (1 - alpha + 1e-5).astype(np.int32).astype(np.uint8)
	fg_mask = cv2.erode(fg_mask, self.erosion_kernels[fg_width])
	bg_mask = cv2.erode(bg_mask, self.erosion_kernels[bg_width])

	trimap = np.ones_like(alpha) * 128
	trimap[fg_mask == 1] = 255
	trimap[bg_mask == 1] = 0

	trimap = cv2.resize(trimap, (w,h), interpolation=cv2.INTER_NEAREST)
	sample['trimap'] = trimap

	### generate mask
	low = 0.01
	high = 1.0
	thres = random.random() * (high - low) + low
	seg_mask = (alpha >= thres).astype(np.int32).astype(np.uint8)
	random_num = random.randint(0,3)
	if random_num == 0:
	seg_mask = cv2.erode(seg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])
	elif random_num == 1:
	seg_mask = cv2.dilate(seg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])
	elif random_num == 2:
	seg_mask = cv2.erode(seg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])
	seg_mask = cv2.dilate(seg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])
	elif random_num == 3:
	seg_mask = cv2.dilate(seg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])
	seg_mask = cv2.erode(seg_mask, self.erosion_kernels[np.random.randint(1, max_kernel_size)])

	seg_mask = cv2.resize(seg_mask, (w,h), interpolation=cv2.INTER_NEAREST)
	sample['mask'] = seg_mask

	return sample


	class Composite(object):
	def __call__(self, sample):
	fg, bg, alpha = sample['fg'], sample['bg'], sample['alpha']
	alpha[alpha < 0 ] = 0
	alpha[alpha > 1] = 1
	fg[fg < 0 ] = 0
	fg[fg > 255] = 255
	bg[bg < 0 ] = 0
	bg[bg > 255] = 255

	image = fg * alpha[:, :, None] + bg * (1 - alpha[:, :, None])
	sample['image'] = image
	return sample


	class CutMask(object):
	def __init__(self, perturb_prob = 0):
	self.perturb_prob = perturb_prob

	def __call__(self, sample):
	if np.random.rand() < self.perturb_prob:
	return sample

	mask = sample['mask'] # H x W, trimap 0--255, segmask 0--1, alpha 0--1
	h, w = mask.shape
	perturb_size_h, perturb_size_w = random.randint(h // 4, h // 2), random.randint(w // 4, w // 2)
	x = random.randint(0, h - perturb_size_h)
	y = random.randint(0, w - perturb_size_w)
	x1 = random.randint(0, h - perturb_size_h)
	y1 = random.randint(0, w - perturb_size_w)

	mask[x:x+perturb_size_h, y:y+perturb_size_w] = mask[x1:x1+perturb_size_h, y1:y1+perturb_size_w].copy()

	sample['mask'] = mask
	return sample


	class ScaleFg(object):
	def __init__(self, min_scale_fg_scale=0.5, max_scale_fg_scale=1.0):
	self.min_scale_fg_scale = min_scale_fg_scale
	self.max_scale_fg_scale = max_scale_fg_scale

	def __call__(self, sample):
	scale_factor = np.random.uniform(low=self.min_scale_fg_scale, high=self.max_scale_fg_scale)

	fg, alpha = sample['fg'], sample['alpha'] # np.array(): [H, W, 3] 0 ~ 255 , [H, W] 0.0 ~ 1.0
	h, w = alpha.shape
	scale_h, scale_w = int(h * scale_factor), int(w * scale_factor)

	new_fg, new_alpha = np.zeros_like(fg), np.zeros_like(alpha)
	fg = cv2.resize(fg, (scale_w, scale_h), interpolation=cv2.INTER_LINEAR)
	alpha = cv2.resize(alpha, (scale_w, scale_h), interpolation=cv2.INTER_LINEAR)

	if scale_factor <= 1:
	offset_h, offset_w = np.random.randint(h - scale_h + 1), np.random.randint(w - scale_w + 1)
	new_fg[offset_h: offset_h + scale_h, offset_w: offset_w + scale_w, :] = fg
	new_alpha[offset_h: offset_h + scale_h, offset_w: offset_w + scale_w] = alpha
	else:
	offset_h, offset_w = np.random.randint(scale_h - h + 1), np.random.randint(scale_w - w + 1)
	new_fg = fg[offset_h: offset_h + scale_h, offset_w: offset_w + scale_w, :]
	new_alpha = alpha[offset_h: offset_h + scale_h, offset_w: offset_w + scale_w]

	sample['fg'], sample['alpha'] = new_fg, new_alpha
	return sample

	class GenBBox(object):
	def __init__(self, bbox_offset_factor = 0.1, random_crop_bbox = None, train_or_test = 'train', dataset_type = None, random_auto_matting=None):
	self.bbox_offset_factor = bbox_offset_factor
	self.random_crop_bbox = random_crop_bbox
	self.train_or_test = train_or_test
	self.dataset_type = dataset_type
	self.random_auto_matting = random_auto_matting

	def __call__(self, sample):

	alpha = sample['alpha'] # [1, H, W] 0.0 ~ 1.0
	indices = torch.nonzero(alpha[0], as_tuple=True)

	if len(indices[0]) > 0:

	min_x, min_y = torch.min(indices[1]), torch.min(indices[0])
	max_x, max_y = torch.max(indices[1]), torch.max(indices[0])

	if self.random_crop_bbox is not None and np.random.uniform(0, 1) < self.random_crop_bbox:
	ori_h_w = (sample['alpha'].shape[-2], sample['alpha'].shape[-1])
	sample['alpha'] = F.interpolate(sample['alpha'][None, :, min_y: max_y + 1, min_x: max_x + 1], size=ori_h_w, mode='bilinear', align_corners=False)[0]
	sample['image'] = F.interpolate(sample['image'][None, :, min_y: max_y + 1, min_x: max_x + 1], size=ori_h_w, mode='bilinear', align_corners=False)[0]
	sample['trimap'] = F.interpolate(sample['trimap'][None, :, min_y: max_y + 1, min_x: max_x + 1], size=ori_h_w, mode='nearest')[0]
	bbox = torch.tensor([[0, 0, ori_h_w[1] - 1, ori_h_w[0] - 1]])

	elif self.bbox_offset_factor != 0:
	bbox_w = max(1, max_x - min_x)
	bbox_h = max(1, max_y - min_y)
	offset_w = math.ceil(self.bbox_offset_factor * bbox_w)
	offset_h = math.ceil(self.bbox_offset_factor * bbox_h)

	min_x = max(0, min_x + np.random.randint(-offset_w, offset_w))
	max_x = min(alpha.shape[2] - 1, max_x + np.random.randint(-offset_w, offset_w))
	min_y = max(0, min_y + np.random.randint(-offset_h, offset_h))
	max_y = min(alpha.shape[1] - 1, max_y + np.random.randint(-offset_h, offset_h))
	bbox = torch.tensor([[min_x, min_y, max_x, max_y]])
	else:
	bbox = torch.tensor([[min_x, min_y, max_x, max_y]])

	if self.random_auto_matting is not None and np.random.uniform(0, 1) < self.random_auto_matting:
	bbox = torch.tensor([[0, 0, alpha.shape[2] - 1, alpha.shape[1] - 1]])

	else:
	bbox = torch.zeros(1, 4)

	sample['bbox'] = bbox.float()
	return sample

	class DataGenerator(Dataset):
	def __init__(
	self,
	data,
	phase="train",
	crop_size=512,
	remove_multi_fg=False,
	min_scale_fg_scale=None,
	max_scale_fg_scale=None,
	with_bbox = False,
	bbox_offset_factor = None,
	return_keys = None,
	random_crop_bbox = None,
	dataset_name = None,
	random_auto_matting = None,
	):
	self.phase = phase
	# self.crop_size = CONFIG.data.crop_size
	self.crop_size = crop_size
	self.remove_multi_fg = remove_multi_fg
	self.with_bbox = with_bbox
	self.bbox_offset_factor = bbox_offset_factor
	self.alpha = data.alpha
	self.return_keys = return_keys
	self.random_crop_bbox = random_crop_bbox
	self.dataset_name = dataset_name
	self.random_auto_matting = random_auto_matting

	if self.phase == "train":
	self.fg = data.fg
	self.bg = data.bg
	self.merged = []
	self.trimap = []
	else:
	self.fg = []
	self.bg = []
	self.merged = data.merged
	self.trimap = data.trimap

	train_trans = [
	RandomAffine(degrees=30, scale=[0.8, 1.25], shear=10, flip=0.5),
	GenMask(),
	CutMask(perturb_prob=CONFIG.data.cutmask_prob),
	RandomCrop((self.crop_size, self.crop_size)),
	RandomJitter(),
	Composite(),
	ToTensor(phase="train")
	]
	if min_scale_fg_scale is not None:
	train_trans.insert(0, ScaleFg(min_scale_fg_scale, max_scale_fg_scale))
	if self.with_bbox:
	train_trans.append(GenBBox(bbox_offset_factor=self.bbox_offset_factor, random_crop_bbox=self.random_crop_bbox, random_auto_matting=self.random_auto_matting))

	test_trans = [ OriginScale(), ToTensor() ]

	self.transform = {
	'train':
	transforms.Compose(train_trans),
	'val':
	transforms.Compose([
	OriginScale(),
	ToTensor()
	]),
	'test':
	transforms.Compose(test_trans)
	}[phase]

	self.fg_num = len(self.fg)

	def select_keys(self, sample):
	new_sample = {}
	for key, val in sample.items():
	if key in self.return_keys:
	new_sample[key] = val
	return new_sample

	def __getitem__(self, idx):
	if self.phase == "train":
	fg = cv2.imread(self.fg[idx % self.fg_num])
	alpha = cv2.imread(self.alpha[idx % self.fg_num], 0).astype(np.float32)/255
	bg = cv2.imread(self.bg[idx], 1)

	if not self.remove_multi_fg:
	fg, alpha, multi_fg = self._composite_fg(fg, alpha, idx)
	else:
	multi_fg = False
	image_name = os.path.split(self.fg[idx % self.fg_num])[-1]
	sample = {'fg': fg, 'alpha': alpha, 'bg': bg, 'image_name': image_name, 'multi_fg': multi_fg}

	else:
	image = cv2.imread(self.merged[idx])
	alpha = cv2.imread(self.alpha[idx], 0)/255.
	trimap = cv2.imread(self.trimap[idx], 0)
	mask = (trimap >= 170).astype(np.float32)
	image_name = os.path.split(self.merged[idx])[-1]

	sample = {'image': image, 'alpha': alpha, 'trimap': trimap, 'mask': mask, 'image_name': image_name, 'alpha_shape': alpha.shape}

	sample = self.transform(sample)

	if self.return_keys is not None:
	sample = self.select_keys(sample)
	if self.dataset_name is not None:
	sample['dataset_name'] = self.dataset_name
	return sample

	def _composite_fg(self, fg, alpha, idx):

	multi_fg = False
	if np.random.rand() < 0.5:
	idx2 = np.random.randint(self.fg_num) + idx
	fg2 = cv2.imread(self.fg[idx2 % self.fg_num])
	alpha2 = cv2.imread(self.alpha[idx2 % self.fg_num], 0).astype(np.float32)/255.
	h, w = alpha.shape
	fg2 = cv2.resize(fg2, (w, h), interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	alpha2 = cv2.resize(alpha2, (w, h), interpolation=maybe_random_interp(cv2.INTER_NEAREST))

	alpha_tmp = 1 - (1 - alpha) * (1 - alpha2)
	if np.any(alpha_tmp < 1):
	fg = fg.astype(np.float32) * alpha[:,:,None] + fg2.astype(np.float32) * (1 - alpha[:,:,None])
	# The overlap of two 50% transparency should be 25%
	alpha = alpha_tmp
	fg = fg.astype(np.uint8)
	multi_fg = True

	if np.random.rand() < 0.25:
	# fg = cv2.resize(fg, (640, 640), interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	# alpha = cv2.resize(alpha, (640, 640), interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	fg = cv2.resize(fg, (1280, 1280), interpolation=maybe_random_interp(cv2.INTER_NEAREST))
	alpha = cv2.resize(alpha, (1280, 1280), interpolation=maybe_random_interp(cv2.INTER_NEAREST))

	return fg, alpha, multi_fg

	def __len__(self):
	if self.phase == "train":
	return len(self.bg)
	else:
	return len(self.alpha)


	class ResziePad(object):

	def __init__(self, target_size=1024):
	self.target_size = target_size

	def __call__(self, sample):
	_, H, W = sample['image'].shape

	scale = self.target_size * 1.0 / max(H, W)
	new_H, new_W = H * scale, W * scale
	new_W = int(new_W + 0.5)
	new_H = int(new_H + 0.5)

	choice = {'image', 'trimap', 'alpha'} if 'trimap' in sample.keys() else {'image', 'alpha'}
	for key in choice:
	if key in {'image', 'trimap'}:
	sample[key] = F.interpolate(sample[key][None], size=(new_H, new_W), mode='bilinear', align_corners=False)[0]
	else:
	# sample[key] = F.interpolate(sample[key][None], size=(new_H, new_W), mode='nearest')[0]
	sample[key] = F.interpolate(sample[key][None], size=(new_H, new_W), mode='bilinear', align_corners=False)[0]
	padding = torch.zeros([sample[key].shape[0], self.target_size, self.target_size], dtype=sample[key].dtype, device=sample[key].device)
	padding[:, : new_H, : new_W] = sample[key]
	sample[key] = padding

	return sample


	class Cv2ResziePad(object):

	def __init__(self, target_size=1024):
	self.target_size = target_size

	def __call__(self, sample):
	H, W, _ = sample['image'].shape

	scale = self.target_size * 1.0 / max(H, W)
	new_H, new_W = H * scale, W * scale
	new_W = int(new_W + 0.5)
	new_H = int(new_H + 0.5)

	choice = {'image', 'trimap', 'alpha'} if 'trimap' in sample.keys() and sample['trimap'] is not None else {'image', 'alpha'}
	for key in choice:
	sample[key] = cv2.resize(sample[key], (new_W, new_H), interpolation=cv2.INTER_LINEAR) # cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC

	if key == 'image':
	padding = np.zeros([self.target_size, self.target_size, sample[key].shape[-1]], dtype=sample[key].dtype)
	padding[: new_H, : new_W, :] = sample[key]
	sample[key] = padding
	sample[key] = sample[key][:, :, ::-1].transpose((2, 0, 1)).astype(np.float32) #/ 255.0
	else:
	padding = np.zeros([self.target_size, self.target_size], dtype=sample[key].dtype)
	padding[: new_H, : new_W] = sample[key]
	sample[key] = padding
	sample[key] = sample[key][None].astype(np.float32)
	sample[key] = torch.from_numpy(sample[key])

	return sample


	class AdobeCompositionTest(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(os.listdir(os.path.join(self.data_dir, 'merged')))

	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(os.path.join(self.data_dir, 'alpha_copy', self.file_names[idx])).convert('L')
	tris = Image.open(os.path.join(self.data_dir, 'trimaps', self.file_names[idx]))
	imgs = Image.open(os.path.join(self.data_dir, 'merged', self.file_names[idx]))
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'Adobe'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	sample['trimap'] = torchvision.transforms.functional.to_tensor(tris) * 255.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'Adobe_' + self.file_names[idx]

	sample = self.transform(sample)
	sample['trimap'][sample['trimap'] < 85] = 0
	sample['trimap'][sample['trimap'] >= 170] = 1
	sample['trimap'][sample['trimap'] >= 85] = 0.5

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class SIMTest(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(glob.glob(os.path.join([data_dir, '', 'alpha', '*']))) # [: 10]
	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(self.file_names[idx]).convert('L')
	# tris = Image.open(self.file_names[idx].replace('alpha', 'trimap'))
	imgs = Image.open(self.file_names[idx].replace('alpha', 'merged'))
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'SIM'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	# sample['trimap'] = torchvision.transforms.functional.to_tensor(tris) * 255.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'SIM_{}_{}'.format(self.file_names[idx].split('/')[-3], self.file_names[idx].split('/')[-1])

	sample = self.transform(sample)
	# sample['trimap'][sample['trimap'] < 85] = 0
	# sample['trimap'][sample['trimap'] >= 170] = 1
	# sample['trimap'][sample['trimap'] >= 85] = 0.5

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class RW100Test(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(glob.glob(os.path.join([data_dir, 'mask', ''])))

	self.name_to_idx = dict()
	for idx, file_name in enumerate(self.file_names):
	self.name_to_idx[file_name.split('/')[-1].split('.')[0]] = idx

	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0, train_or_test='test', dataset_type='RW100')
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(self.file_names[idx]).convert('L')
	imgs = Image.open(self.file_names[idx].replace('mask', 'image')[:-6] + '.jpg')
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'RW100'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'RW100_' + self.file_names[idx].split('/')[-1]

	sample = self.transform(sample)

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class AIM500Test(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(glob.glob(os.path.join([data_dir, 'original', ''])))

	self.name_to_idx = dict()
	for idx, file_name in enumerate(self.file_names):
	self.name_to_idx[file_name.split('/')[-1].split('.')[0]] = idx

	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(self.file_names[idx].replace('original', 'mask').replace('jpg', 'png')).convert('L')
	# tris = Image.open(self.file_names[idx].replace('original', 'trimap').replace('jpg', 'png')).convert('L')
	imgs = Image.open(self.file_names[idx])
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'AIM500'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	# sample['trimap'] = torchvision.transforms.functional.to_tensor(tris) * 255.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'AIM500_' + self.file_names[idx].split('/')[-1]

	sample = self.transform(sample)
	# sample['trimap'][sample['trimap'] < 85] = 0
	# sample['trimap'][sample['trimap'] >= 170] = 1
	# sample['trimap'][sample['trimap'] >= 85] = 0.5

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class RWP636Test(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(glob.glob(os.path.join([data_dir, 'image', ''])))

	self.name_to_idx = dict()
	for idx, file_name in enumerate(self.file_names):
	self.name_to_idx[file_name.split('/')[-1].split('.')[0]] = idx

	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(self.file_names[idx].replace('image', 'alpha').replace('jpg', 'png')).convert('L')
	imgs = Image.open(self.file_names[idx])
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'RWP636'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'RWP636_' + self.file_names[idx].split('/')[-1]

	sample = self.transform(sample)

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class AM2KTest(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(glob.glob(os.path.join([data_dir, 'validation/original', ''])))
	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(self.file_names[idx].replace('original', 'mask').replace('jpg', 'png')).convert('L')
	# tris = Image.open(self.file_names[idx].replace('original', 'trimap').replace('jpg', 'png')).convert('L')
	imgs = Image.open(self.file_names[idx])
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'AM2K'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	# sample['trimap'] = torchvision.transforms.functional.to_tensor(tris) * 255.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'AM2K_' + self.file_names[idx].split('/')[-1]

	sample = self.transform(sample)
	# sample['trimap'][sample['trimap'] < 85] = 0
	# sample['trimap'][sample['trimap'] >= 170] = 1
	# sample['trimap'][sample['trimap'] >= 85] = 0.5

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class P3M500Test(Dataset):
	def __init__(self, data_dir, target_size=1024, multi_fg=None):
	self.data_dir = data_dir
	self.file_names = sorted(glob.glob(os.path.join([data_dir, 'original_image', ''])))

	self.name_to_idx = dict()
	for idx, file_name in enumerate(self.file_names):
	self.name_to_idx[file_name.split('/')[-1].split('.')[0]] = idx

	test_trans = [
	ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.file_names)

	def __getitem__(self, idx):
	phas = Image.open(self.file_names[idx].replace('original_image', 'mask').replace('jpg', 'png')).convert('L')
	# tris = Image.open(self.file_names[idx].replace('original_image', 'trimap').replace('jpg', 'png')).convert('L')
	imgs = Image.open(self.file_names[idx])
	sample = {
	'ori_h_w': (imgs.size[1], imgs.size[0]),
	'data_type': 'P3M500'
	}

	sample['alpha'] = torchvision.transforms.functional.to_tensor(phas) # [1, H, W] 0.0 ~ 1.0
	# sample['trimap'] = torchvision.transforms.functional.to_tensor(tris) * 255.0
	sample['image'] = torchvision.transforms.functional.to_tensor(imgs)
	sample['image_name'] = 'P3M500_' + self.file_names[idx].split('/')[-1]

	sample = self.transform(sample)
	# sample['trimap'][sample['trimap'] < 85] = 0
	# sample['trimap'][sample['trimap'] >= 170] = 1
	# sample['trimap'][sample['trimap'] >= 85] = 0.5

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	class MattingTest(Dataset):
	def __init__(
	self,
	data_type,
	data_dir,
	image_sub_path,
	alpha_sub_path,
	trimpa_sub_path=None,
	target_size=1024,
	multi_fg=None,
	):
	self.data_type = data_type
	self.data_dir = data_dir

	self.image_paths = sorted(glob.glob(os.path.join(*[data_dir, image_sub_path])))
	self.alpha_paths = sorted(glob.glob(os.path.join(*[data_dir, alpha_sub_path])))
	self.trimpa_paths = sorted(glob.glob(os.path.join(*[data_dir, trimpa_sub_path]))) if trimpa_sub_path is not None else None

	self.name_to_idx = dict()
	for idx, file_name in enumerate(self.image_paths):
	self.name_to_idx[file_name.split('/')[-1].split('.')[0]] = idx

	test_trans = [
	Cv2ResziePad(target_size=target_size),
	GenBBox(bbox_offset_factor=0)
	]
	self.transform = transforms.Compose(test_trans)
	self.multi_fg = multi_fg

	def __len__(self): # 1000
	return len(self.image_paths)

	def __getitem__(self, idx):

	img = cv2.imread(self.image_paths[idx])
	sample = {
	'image': img.astype(np.float32) / 255,
	'alpha': cv2.imread(self.alpha_paths[idx], 0).astype(np.float32) / 255,
	'trimap': cv2.imread(self.trimpa_paths[idx], 0) if self.trimpa_paths is not None else None,
	'ori_h_w': (img.shape[0], img.shape[1]),
	'data_type': self.data_type,
	'image_name': self.data_type + '_' + self.image_paths[idx].split('/')[-1]
	}

	sample = self.transform(sample)
	if self.trimpa_paths is not None:
	sample['trimap'][sample['trimap'] < 85] = 0
	sample['trimap'][sample['trimap'] >= 170] = 1
	sample['trimap'][sample['trimap'] >= 85] = 0.5
	else:
	del sample['trimap']

	if self.multi_fg is not None:
	sample['multi_fg'] = torch.tensor(self.multi_fg)

	return sample


	def adobe_composition_collate_fn(batch):
	new_batch = defaultdict(list)
	for sub_batch in batch:
	for key in sub_batch.keys():
	new_batch[key].append(sub_batch[key])
	for key in new_batch:
	if isinstance(new_batch[key][0], torch.Tensor):
	new_batch[key] = torch.stack(new_batch[key])
	return dict(new_batch)


	def build_d2_test_dataloader(
	dataset,
	mapper=None,
	total_batch_size=None,
	local_batch_size=None,
	num_workers=0,
	collate_fn=None
	):

	assert (total_batch_size is None) != (
	local_batch_size is None
	), "Either total_batch_size or local_batch_size must be specified"

	world_size = comm.get_world_size()

	if total_batch_size is not None:
	assert (
	total_batch_size > 0 and total_batch_size % world_size == 0
	), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
	total_batch_size, world_size
	)
	batch_size = total_batch_size // world_size

	if local_batch_size is not None:
	batch_size = local_batch_size

	logger = logging.getLogger(__name__)
	if batch_size != 1:
	logger.warning(
	"When testing, batch size is set to 1. "
	"This is the only mode that is supported for d2."
	)

	return build_detection_test_loader(
	dataset=dataset,
	mapper=mapper,
	sampler=None,
	num_workers=num_workers,
	collate_fn=collate_fn,
	)


	class AdobeCompositionEvaluator(DatasetEvaluator):

	def __init__(
	self,
	save_eval_results_step=-1,
	output_dir=None,
	eval_dataset_type=['Adobe'],
	distributed=True,
	eval_w_sam_hq_mask = False,
	):

	self.save_eval_results_step = save_eval_results_step
	self.output_dir = output_dir
	self.eval_index = 0
	self.eval_dataset_type = eval_dataset_type
	self.eval_w_sam_hq_mask = eval_w_sam_hq_mask

	self._distributed = distributed
	self._logger = logging.getLogger(__name__)

	def reset(self):
	self.eval_metric = dict()
	for i in self.eval_dataset_type:
	self.eval_metric[i + '_MSE'] = []
	self.eval_metric[i + '_SAD'] = []
	self.eval_metric[i + '_MAD'] = []
	self.eval_metric[i + '_Grad'] = []
	self.eval_metric[i + '_Conn'] = []

	os.makedirs(self.output_dir, exist_ok=True) if self.output_dir is not None else None

	def process(self, inputs, outputs):
	"""
	Args:
	inputs: {'alpha', 'trimap', 'image', 'bbox', 'image_name'}
	outputs: [1, 1, H, W] 0. ~ 1.
	"""

	# crop the black pad area
	assert inputs['image'].shape[-1] == inputs['image'].shape[-2] == 1024 and len(inputs['ori_h_w']) == 1
	inputs['ori_h_w'] = inputs['ori_h_w'][0]
	before_pad_h, before_pad_w = int(1024 / max(inputs['ori_h_w']) * inputs['ori_h_w'][0] + 0.5), int(1024 / max(inputs['ori_h_w']) * inputs['ori_h_w'][1] + 0.5)
	inputs['image'] = inputs['image'][:, :, :before_pad_h, :before_pad_w]
	inputs['alpha'] = inputs['alpha'][:, :, :before_pad_h, :before_pad_w]

	if self.eval_w_sam_hq_mask:
	outputs, samhq_low_res_masks = outputs[0][:, :, :before_pad_h, :before_pad_w], outputs[1][:, :, :before_pad_h, :before_pad_w]
	pred_alpha, label_alpha, samhq_low_res_masks = outputs.cpu().numpy(), inputs['alpha'].numpy(), (samhq_low_res_masks > 0).float().cpu()
	else:
	outputs = outputs[:, :, :before_pad_h, :before_pad_w]
	pred_alpha, label_alpha = outputs.cpu().numpy(), inputs['alpha'].numpy()

	# if 'trimap' in inputs.keys():
	# inputs['trimap'] = inputs['trimap'][:, :, :before_pad_h, :before_pad_w]
	# trimap = inputs['trimap'].numpy()
	# assert np.max(trimap) <= 1 and len(np.unique(trimap)) <= 3
	# sad_loss_unknown = compute_sad_loss(pred_alpha, label_alpha, trimap, area='unknown')
	# mse_loss_unknown = compute_mse_loss(pred_alpha, label_alpha, trimap, area='unknown')

	# self.eval_metric[inputs['data_type'][0] + '_unknown_mse (1e-3)'].append(mse_loss_unknown)
	# self.eval_metric[inputs['data_type'][0] + '_unknown_sad (1e3)'].append(sad_loss_unknown)

	# calculate loss
	assert np.max(pred_alpha) <= 1 and np.max(label_alpha) <= 1
	eval_pred = np.uint8(pred_alpha[0, 0] * 255.0 + 0.5) * 1.0
	eval_gt = label_alpha[0, 0] * 255.0

	detailmap = np.zeros_like(eval_gt) + 128
	mse_loss_ = compute_mse_loss(eval_pred, eval_gt, detailmap)
	sad_loss_ = compute_sad_loss(eval_pred, eval_gt, detailmap)[0]
	mad_loss_ = compute_mad_loss(eval_pred, eval_gt, detailmap)
	grad_loss_ = compute_gradient_loss(eval_pred, eval_gt, detailmap)
	conn_loss_ = compute_connectivity_error(eval_pred, eval_gt, detailmap)

	self.eval_metric[inputs['data_type'][0] + '_MSE'].append(mse_loss_)
	self.eval_metric[inputs['data_type'][0] + '_SAD'].append(sad_loss_)
	self.eval_metric[inputs['data_type'][0] + '_MAD'].append(mad_loss_)
	self.eval_metric[inputs['data_type'][0] + '_Grad'].append(grad_loss_)
	self.eval_metric[inputs['data_type'][0] + '_Conn'].append(conn_loss_)

	# vis results
	if self.save_eval_results_step != -1 and self.eval_index % self.save_eval_results_step == 0:
	if self.eval_w_sam_hq_mask:
	self.save_vis_results(inputs, pred_alpha, samhq_low_res_masks)
	else:
	self.save_vis_results(inputs, pred_alpha)
	self.eval_index += 1

	def save_vis_results(self, inputs, pred_alpha, samhq_low_res_masks=None):

	# image
	image = inputs['image'][0].permute(1, 2, 0) * 255.0
	l, u, r, d = int(inputs['bbox'][0, 0, 0].item()), int(inputs['bbox'][0, 0, 1].item()), int(inputs['bbox'][0, 0, 2].item()), int(inputs['bbox'][0, 0, 3].item())
	red_line = torch.tensor([[255., 0., 0.]], device=image.device, dtype=image.dtype)
	image[u: d, l, :] = red_line
	image[u: d, r, :] = red_line
	image[u, l: r, :] = red_line
	image[d, l: r, :] = red_line
	image = np.uint8(image.numpy())

	# trimap, pred_alpha, label_alpha
	save_results = [image]

	choice = [inputs['trimap'], torch.from_numpy(pred_alpha), inputs['alpha']] if 'trimap' in inputs.keys() else [torch.from_numpy(pred_alpha), inputs['alpha']]
	for val in choice:
	val = val[0].permute(1, 2, 0).repeat(1, 1, 3) * 255.0 + 0.5 # +0.5 and int() = round()
	val = np.uint8(val.numpy())
	save_results.append(val)

	if samhq_low_res_masks is not None:
	save_results.append(np.uint8(samhq_low_res_masks[0].permute(1, 2, 0).repeat(1, 1, 3).numpy() * 255.0))

	save_results = np.concatenate(save_results, axis=1)
	save_name = os.path.join(self.output_dir, inputs['image_name'][0])
	Image.fromarray(save_results).save(save_name.replace('.jpg', '.png'))

	def evaluate(self):

	if self._distributed:
	comm.synchronize()
	eval_metric = comm.gather(self.eval_metric, dst=0)

	if not comm.is_main_process():
	return {}

	merges_eval_metric = defaultdict(list)
	for sub_eval_metric in eval_metric:
	for key, val in sub_eval_metric.items():
	merges_eval_metric[key] += val
	eval_metric = merges_eval_metric

	else:
	eval_metric = self.eval_metric

	eval_results = {}

	for key, val in eval_metric.items():
	if len(val) != 0:
	# if 'mse' in key:
	# eval_results[key] = np.array(val).mean() * 1e3
	# else:
	# assert 'sad' in key
	# eval_results[key] = np.array(val).mean() / 1e3
	eval_results[key] = np.array(val).mean()

	return eval_results


	if __name__ == '__main__':
	pass