Dimension problem

Dear community and @John6666 ,

I finally updated my dataset mask with end products size (512 x 512 x 3). I was going to follow code as suggested but fell to the same error before I recreate my masks. I ask gpt that I might need to adjust the size of the images but both dataset from the example code (check their dataset here) and mine (check my dataset here) both are 3 dimension but theirs are (1080, 1920, 3) while mine are (512, 512, 3).

Here’s the error, the code is perfectly similar to this tutorial. Is my mask still wrong (it might not be segmentation map, idk what is segmentation map is it not in 3d or is it not a .jpeg image?) or what parameters should I change?

The error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[59], line 1
----> 1 trainer.train()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:2155, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   2152 try:
   2153     # Disable progress bars when uploading models during checkpoints to avoid polluting stdout
   2154     hf_hub_utils.disable_progress_bars()
-> 2155     return inner_training_loop(
   2156         args=args,
   2157         resume_from_checkpoint=resume_from_checkpoint,
   2158         trial=trial,
   2159         ignore_keys_for_eval=ignore_keys_for_eval,
   2160     )
   2161 finally:
   2162     hf_hub_utils.enable_progress_bars()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:2472, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2470 update_step += 1
   2471 num_batches = args.gradient_accumulation_steps if update_step != (total_updates - 1) else remainder
-> 2472 batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
   2473 for i, inputs in enumerate(batch_samples):
   2474     step += 1

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:5131, in Trainer.get_batch_samples(self, epoch_iterator, num_batches)
   5129 for _ in range(num_batches):
   5130     try:
-> 5131         batch_samples += [next(epoch_iterator)]
   5132     except StopIteration:
   5133         break

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\accelerate\data_loader.py:552, in DataLoaderShard.__iter__(self)
    550 # We iterate one batch ahead to check when we are at the end
    551 try:
--> 552     current_batch = next(dataloader_iter)
    553 except StopIteration:
    554     yield

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\utils\data\dataloader.py:701, in _BaseDataLoaderIter.__next__(self)
    698 if self._sampler_iter is None:
    699     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    700     self._reset()  # type: ignore[call-arg]
--> 701 data = self._next_data()
    702 self._num_yielded += 1
    703 if (
    704     self._dataset_kind == _DatasetKind.Iterable
    705     and self._IterableDataset_len_called is not None
    706     and self._num_yielded > self._IterableDataset_len_called
    707 ):

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\utils\data\dataloader.py:757, in _SingleProcessDataLoaderIter._next_data(self)
    755 def _next_data(self):
    756     index = self._next_index()  # may raise StopIteration
--> 757     data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    758     if self._pin_memory:
    759         data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\utils\data\_utils\fetch.py:50, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
     48 if self.auto_collation:
     49     if hasattr(self.dataset, "__getitems__") and self.dataset.__getitems__:
---> 50         data = self.dataset.__getitems__(possibly_batched_index)
     51     else:
     52         data = [self.dataset[idx] for idx in possibly_batched_index]

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\datasets\arrow_dataset.py:2766, in Dataset.__getitems__(self, keys)
   2764 def __getitems__(self, keys: List) -> List:
   2765     """Can be used to get a batch using a list of integers indices."""
-> 2766     batch = self.__getitem__(keys)
   2767     n_examples = len(batch[next(iter(batch))])
   2768     return [{col: array[i] for col, array in batch.items()} for i in range(n_examples)]

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\datasets\arrow_dataset.py:2762, in Dataset.__getitem__(self, key)
   2760 def __getitem__(self, key):  # noqa: F811
   2761     """Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools)."""
-> 2762     return self._getitem(key)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\datasets\arrow_dataset.py:2747, in Dataset._getitem(self, key, **kwargs)
   2745 formatter = get_formatter(format_type, features=self._info.features, **format_kwargs)
   2746 pa_subtable = query_table(self._data, key, indices=self._indices)
-> 2747 formatted_output = format_table(
   2748     pa_subtable, key, formatter=formatter, format_columns=format_columns, output_all_columns=output_all_columns
   2749 )
   2750 return formatted_output

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\datasets\formatting\formatting.py:639, in format_table(table, key, formatter, format_columns, output_all_columns)
    637 python_formatter = PythonFormatter(features=formatter.features)
    638 if format_columns is None:
--> 639     return formatter(pa_table, query_type=query_type)
    640 elif query_type == "column":
    641     if key in format_columns:

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\datasets\formatting\formatting.py:407, in Formatter.__call__(self, pa_table, query_type)
    405     return self.format_column(pa_table)
    406 elif query_type == "batch":
--> 407     return self.format_batch(pa_table)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\datasets\formatting\formatting.py:522, in CustomFormatter.format_batch(self, pa_table)
    520 batch = self.python_arrow_extractor().extract_batch(pa_table)
    521 batch = self.python_features_decoder.decode_batch(batch)
--> 522 return self.transform(batch)

Cell In[47], line 10, in train_transforms(example_batch)
      8 images = [jitter(x) for x in example_batch['pixel_values']]
      9 labels = [x for x in example_batch['label']]
---> 10 inputs = feature_extractor(images, labels)
     11 return inputs

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\models\segformer\image_processing_segformer.py:303, in SegformerImageProcessor.__call__(self, images, segmentation_maps, **kwargs)
    296 def __call__(self, images, segmentation_maps=None, **kwargs):
    297     """
    298     Preprocesses a batch of images and optionally segmentation maps.
    299 
    300     Overrides the `__call__` method of the `Preprocessor` class so that both images and segmentation maps can be
    301     passed in as positional arguments.
    302     """
--> 303     return super().__call__(images, segmentation_maps=segmentation_maps, **kwargs)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\image_processing_utils.py:41, in BaseImageProcessor.__call__(self, images, **kwargs)
     39 def __call__(self, images, **kwargs) -> BatchFeature:
     40     """Preprocess an image or a batch of images."""
---> 41     return self.preprocess(images, **kwargs)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\utils\deprecation.py:165, in deprecate_kwarg.<locals>.wrapper.<locals>.wrapped_func(*args, **kwargs)
    161 elif minimum_action in (Action.NOTIFY, Action.NOTIFY_ALWAYS):
    162     # DeprecationWarning is ignored by default, so we use FutureWarning instead
    163     warnings.warn(message, FutureWarning, stacklevel=2)
--> 165 return func(*args, **kwargs)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\utils\generic.py:852, in filter_out_non_signature_kwargs.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    843         cls_prefix = ""
    845     warnings.warn(
    846         f"The following named arguments are not valid for `{cls_prefix}{func.__name__}`"
    847         f" and were ignored: {invalid_kwargs_names}",
    848         UserWarning,
    849         stacklevel=2,
    850     )
--> 852 return func(*args, **valid_kwargs)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\models\segformer\image_processing_segformer.py:423, in SegformerImageProcessor.preprocess(self, images, segmentation_maps, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_reduce_labels, return_tensors, data_format, input_data_format)
    420 data = {"pixel_values": images}
    422 if segmentation_maps is not None:
--> 423     segmentation_maps = [
    424         self._preprocess_mask(
    425             segmentation_map=segmentation_map,
    426             do_reduce_labels=do_reduce_labels,
    427             do_resize=do_resize,
    428             size=size,
    429             input_data_format=input_data_format,
    430         )
    431         for segmentation_map in segmentation_maps
    432     ]
    433     data["labels"] = segmentation_maps
    435 return BatchFeature(data=data, tensor_type=return_tensors)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\models\segformer\image_processing_segformer.py:424, in <listcomp>(.0)
    420 data = {"pixel_values": images}
    422 if segmentation_maps is not None:
    423     segmentation_maps = [
--> 424         self._preprocess_mask(
    425             segmentation_map=segmentation_map,
    426             do_reduce_labels=do_reduce_labels,
    427             do_resize=do_resize,
    428             size=size,
    429             input_data_format=input_data_format,
    430         )
    431         for segmentation_map in segmentation_maps
    432     ]
    433     data["labels"] = segmentation_maps
    435 return BatchFeature(data=data, tensor_type=return_tensors)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\models\segformer\image_processing_segformer.py:278, in SegformerImageProcessor._preprocess_mask(self, segmentation_map, do_reduce_labels, do_resize, size, input_data_format)
    276     added_channel_dim = False
    277     if input_data_format is None:
--> 278         input_data_format = infer_channel_dimension_format(segmentation_map, num_channels=1)
    279 # reduce zero label if needed
    280 segmentation_map = self._preprocess(
    281     image=segmentation_map,
    282     do_reduce_labels=do_reduce_labels,
   (...)
    288     input_data_format=input_data_format,
    289 )

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\image_utils.py:255, in infer_channel_dimension_format(image, num_channels)
    253 elif image.shape[last_dim] in num_channels:
    254     return ChannelDimension.LAST
--> 255 raise ValueError("Unable to infer channel dimension format")

ValueError: Unable to infer channel dimension format

I have checked this]([Error] Unable to infer channel dimension format) but from that size I think both, image and the label, has similar size and dimension already.
Thanks in advance

1 Like

Neither the original dataset nor your dataset can be examined because they are gated models…
Is there an example that is not a gated model, even if it is not an actual one?
Also, the samples are sometimes out of date and incorrect.

How to make it ungated models? wait let me check

It is opened already, was there other button I have not pressed?

Disable access requests.


Now it’s disabled if I read the UI correctly.

Oh nevermind I’m a giddygoat, now you can check it I guess?

1 Like

I now can check it!

Great I wish you know what should I do because all guides leads me to convert the image to similar dimension (convert to RGB or make it consistent in jpeg) but my image is already at that. I made the mask by using this btw if you’d like to know how I get the segmentation map. I did it with pipeline from transformers

Is there a documentation I should read about these? Like… is it from here?

import torch
from torch import nn
import evaluate

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    # currently using _compute instead of compute
    # see this issue for more info: https://github.com/huggingface/evaluate/pull/328#issuecomment-1286866576
    metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=0,
            reduce_labels=feature_extractor.reduce_labels,
        )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
    
    return metrics

from what I checked that bilinear is correct too for segmentation.

1 Like

→ 278 input_data_format = infer_channel_dimension_format(segmentation_map, num_channels=1)

It returns to error 1, in this case, it seems that the mask image is better with single channel of black and white, rather than three channels of RGB…
For this purpose, image.convert(“L”) is fine.

1 Like

So I should loop all the images to use image.convert("L") and create the dataset from start again? Sure, let me give it a try

1 Like

wait! I think you can pre-process it just before processing without having to remake it.

Don’t you think it’s harder for the computer to make Transformer take one image and convert it to image.convert('L') to one image (or one batch of image) before using it in training?

I mean, as in computing power

No, it’s easy. Computers are really fast now.
However, I think it would be better to convert the whole list yourself before passing it to the transformers library. The calculation will be done in an instant.
If the image is PIL, it’s easy, and if the image is numpy, you can just have AI write the code to convert it to PIL, reduce the colors, and convert it back to numpy.

I think the image is in PIL, the problem is I don’t know where to put the conversion. Is it after

`from datasets import load_dataset

hf_dataset_identifier = “seand0101/segformer-b0-finetuned-ade-512-512-manggarai-watergate”
ds = load_dataset(hf_dataset_identifier)`

I loop over the ds["train"]["pixel_values"][0]and putting them on a List after converting them?

if ds[“train”][“pixel_values”][0] is a PIL image, ds[“train”][“pixel_values”][0] = ds[“train”][“pixel_values”][0].convert(“L”)

I think there are better codes, but this one still works.

Got it will try it first, wait, thanks again John

Wait I thought it’s the mask that is converted to 1D, the picture too? Or is it just your example?

1 Like

No, I think the only thing that is causing the error is the mask. If there is another error, we can think about it. Normally, images are assumed to be RGB. Except for the mask.
And apart from the number of colors, if you pass the options to the transformers preprocessor, it will do all the tedious processing for you. resize, crop, padding, …

Great, I will try on the mask first, wait.

Wait should I append this to a List or if I convert it once they will be used new as it is

from torchvision.transforms import ColorJitter
from transformers import SegformerFeatureExtractor

feature_extractor = SegformerFeatureExtractor()
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = feature_extractor(images, labels)
    return inputs

def val_transforms(example_batch):
    images=[x for x in example_batch['pixel_values']]
    labels = [x for x in example_batch['label']]
    inputs = feature_extractor(images, labels)

    return inputs

# Set transforms
train_ds.set_transform(train_transforms)
test_ds.set_transform(val_transforms)`

I can add the function to these I guess, it includes as their preprocess

1 Like

should I append this to a List or if I convert it once they will be used new as it is

As long as it’s not a huge amount of data, there’s no problem using a newly created list, and it’s actually safer.

Changed that code to this

from torchvision.transforms import ColorJitter
from transformers import SegformerFeatureExtractor

feature_extractor = SegformerFeatureExtractor()
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)

def train_transforms(example_batch):
    images = [jitter(x) for x in example_batch['pixel_values']]
    labels = [x.convert("L") for x in example_batch['label']]
    inputs = feature_extractor(images, labels)
    return inputs

def val_transforms(example_batch):
    images=[x for x in example_batch['pixel_values']]
    labels = [x.convert("L") for x in example_batch['label']]
    inputs = feature_extractor(images, labels)

    return inputs

# Set transforms
train_ds.set_transform(train_transforms)
test_ds.set_transform(val_transforms)

if that makes any sense.

Now the error changes into target 11 out of bounds

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[125], line 1
----> 1 trainer.train()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:2155, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   2152 try:
   2153     # Disable progress bars when uploading models during checkpoints to avoid polluting stdout
   2154     hf_hub_utils.disable_progress_bars()
-> 2155     return inner_training_loop(
   2156         args=args,
   2157         resume_from_checkpoint=resume_from_checkpoint,
   2158         trial=trial,
   2159         ignore_keys_for_eval=ignore_keys_for_eval,
   2160     )
   2161 finally:
   2162     hf_hub_utils.enable_progress_bars()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:2522, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2516 context = (
   2517     functools.partial(self.accelerator.no_sync, model=model)
   2518     if i != len(batch_samples) - 1
   2519     else contextlib.nullcontext
   2520 )
   2521 with context():
-> 2522     tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
   2524 if (
   2525     args.logging_nan_inf_filter
   2526     and not is_torch_xla_available()
   2527     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
   2528 ):
   2529     # if loss is nan or inf simply add the average of previous logged losses
   2530     tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:3655, in Trainer.training_step(self, model, inputs, num_items_in_batch)
   3653         loss = self.compute_loss(model, inputs)
   3654     else:
-> 3655         loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
   3657 del inputs
   3658 if (
   3659     self.args.torch_empty_cache_steps is not None
   3660     and self.state.global_step % self.args.torch_empty_cache_steps == 0
   3661 ):

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\trainer.py:3709, in Trainer.compute_loss(self, model, inputs, return_outputs, num_items_in_batch)
   3707         loss_kwargs["num_items_in_batch"] = num_items_in_batch
   3708     inputs = {**inputs, **loss_kwargs}
-> 3709 outputs = model(**inputs)
   3710 # Save past state if it exists
   3711 # TODO: this needs to be fixed and made cleaner later.
   3712 if self.args.past_index >= 0:

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\nn\modules\module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\nn\modules\module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\transformers\models\segformer\modeling_segformer.py:809, in SegformerForSemanticSegmentation.forward(self, pixel_values, labels, output_attentions, output_hidden_states, return_dict)
    807 if self.config.num_labels > 1:
    808     loss_fct = CrossEntropyLoss(ignore_index=self.config.semantic_loss_ignore_index)
--> 809     loss = loss_fct(upsampled_logits, labels)
    810 elif self.config.num_labels == 1:
    811     valid_mask = ((labels >= 0) & (labels != self.config.semantic_loss_ignore_index)).float()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\nn\modules\module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
   1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\nn\modules\module.py:1747, in Module._call_impl(self, *args, **kwargs)
   1742 # If we don't have any hooks, we want to skip the rest of the logic in
   1743 # this function, and just call forward.
   1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1745         or _global_backward_pre_hooks or _global_backward_hooks
   1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
   1749 result = None
   1750 called_always_called_hooks = set()

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\nn\modules\loss.py:1293, in CrossEntropyLoss.forward(self, input, target)
   1292 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1293     return F.cross_entropy(
   1294         input,
   1295         target,
   1296         weight=self.weight,
   1297         ignore_index=self.ignore_index,
   1298         reduction=self.reduction,
   1299         label_smoothing=self.label_smoothing,
   1300     )

File c:\Users\Lenovo\miniconda3\envs\pretrain-huggingface\Lib\site-packages\torch\nn\functional.py:3479, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
   3477 if size_average is not None or reduce is not None:
   3478     reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3479 return torch._C._nn.cross_entropy_loss(
   3480     input,
   3481     target,
   3482     weight,
   3483     _Reduction.get_enum(reduction),
   3484     ignore_index,
   3485     label_smoothing,
   3486 )

IndexError: Target 11 is out of bounds.
1 Like

This?