Solved by putting these limiters on the code:
from torchvision.transforms import ColorJitter
from transformers import SegformerFeatureExtractor
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
def train_transforms(example_batch):
# images = [jitter(x) for x in example_batch['pixel_values']]
# labels = [x.convert("L") for x in example_batch['label']]
# inputs = feature_extractor(images, labels)
# return inputs
images = [jitter(x) for x in example_batch['pixel_values']]
labels = [x.convert("L") for x in example_batch['label']] # Convert label images to L mode (greyscale)
labels = [np.array(label) for label in labels] # Convert labels to numpy arrays for clamping
labels = [np.clip(label, 0, num_labels - 1) for label in labels] # Clamp labels to the valid range
inputs = feature_extractor(images, labels, return_tensors="pt")
return inputs
def val_transforms(example_batch):
# images=[x for x in example_batch['pixel_values']]
# labels = [x.convert("L") for x in example_batch['label']]
# inputs = feature_extractor(images, labels)
# return inputs
images = [x for x in example_batch['pixel_values']]
labels = [x.convert("L") for x in example_batch['label']]
labels = [np.array(label) for label in labels]
labels = [np.clip(label, 0, num_labels - 1) for label in labels]
inputs = feature_extractor(images, labels, return_tensors="pt")
return inputs
# Set transforms
train_ds.set_transform(train_transforms)
test_ds.set_transform(val_transforms)
And correcting this, I didn’t realize I redefining it again with empty feature extractor ()
from transformers import SegformerFeatureExtractor
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
I will update as to how it works after reading the docs, but so far I need to handle the training memories… what is a good epoch for a model and how do you decide that?