ValueError: negative dimensions are not allowed
hey
@ylacombe
, ive been trying to finetune w2vbert-2.0 on some of my own custom training data, however when i run the prepare_dataset
function i get the following error: ValueError: negative dimensions are not allowed
that originates from:
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/audio_utils.py", line 532, in spectrogram
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
i have tried finetuning on other models like MMS and Whisper but the issue only persists in w2vBERT-2.0.
for some additonal context i am using the latest release of transformers, datasets,torchaudio and torch.
the following is the stack trace of the crash.
preprocess datasets (num_proc=32): 9%|██▋ | 6791/71915 [00:27<04:24, 245.95 examples/s]
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/user/anaconda3/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 634, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3517, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3416, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 669, in prepare_dataset
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py", line 99, in __call__
inputs = self.feature_extractor(audio, sampling_rate=sampling_rate, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in __call__
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in <listcomp>
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 128, in _extract_fbank_features
features = spectrogram(
^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/audio_utils.py", line 532, in spectrogram
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: negative dimensions are not allowed
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 807, in <module>
main()
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 676, in main
vectorized_datasets = raw_datasets.map(
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/dataset_dict.py", line 869, in map
{
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/dataset_dict.py", line 870, in <dictcomp>
k: dataset.map(
^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 602, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 567, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3248, in map
for rank, done, content in iflatmap_unordered(
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 674, in iflatmap_unordered
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 674, in <listcomp>
[async_result.get(timeout=0.05) for async_result in async_results]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/multiprocess/pool.py", line 774, in get
raise self._value
File "/home/user/anaconda3/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 634, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3517, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3416, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 669, in prepare_dataset
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py", line 99, in __call__
inputs = self.feature_extractor(audio, sampling_rate=sampling_rate, **kwargs)
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in __call__
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in <listcomp>
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 128, in _extract_fbank_features
features = spectrogram(
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/audio_utils.py", line 532, in spectrogram
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
^^^^^^^^^^^^^^^^^
ValueError: negative dimensions are not allowed
the following is how my prepare_dataset function looks like.
def prepare_dataset(batch):
audio = batch["audio"]
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
batch["input_length"] = len(batch["input_features"])
batch["labels"] = processor(text=batch["target_text"]).input_ids
return batch
with training_args.main_process_first(desc="dataset map preprocessing"):
vectorized_datasets = raw_datasets.map(
prepare_dataset,
remove_columns=next(iter(raw_datasets.values())).column_names,
num_proc=num_workers,
desc="preprocess datasets",
)
ive even used the training script from: https://lightning.ai/pashanitw/studios/w2v-bert-2-0-asr-finetuning
the following is how this prepare_dataset function looks like:
def preprocess_dataset(batch, processor):
audio = batch["audio"]
audio_length_seconds = len(audio["array"]) / audio["sampling_rate"]
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
batch["input_length"] = len(batch["input_features"])
batch["length_in_seconds"] = audio_length_seconds
batch["labels"] = processor(text=batch["sentence"]).input_ids
return batch
But sadly the same error as before:
ValueError: negative dimensions are not allowed