My model is trained with additional token
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))
However, I am unable to load the model in the standard way, i.e.
AutoModelForCausalLM.from_pretrained(checkpoint)
The following error is reported:
RuntimeError Traceback (most recent call last)
Cell In[9], line 27
24 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
25 tokenizer.add_special_tokens({'pad_token': '[PAD]'})
---> 27 model = AutoModelForCausalLM.from_pretrained(
28 dir, torch_dtype=torch.float16,
29 trust_remote_code=True,
30 device_map="auto",
31 # quantization_config=quantization_config,
32 )
33 # Configuration of some generation-related settings
34 generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
File ~/miniconda3/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:561, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
559 elif type(config) in cls._model_mapping.keys():
560 model_class = _get_model_class(config, cls._model_mapping)
--> 561 return model_class.from_pretrained(
562 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
563 )
564 raise ValueError(
565 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
566 f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
567 )
File ~/miniconda3/lib/python3.11/site-packages/transformers/modeling_utils.py:3565, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3562 model.hf_quantizer = hf_quantizer
3564 if _adapter_model_path is not None:
-> 3565 model.load_adapter(
3566 _adapter_model_path,
3567 adapter_name=adapter_name,
3568 token=token,
3569 adapter_kwargs=adapter_kwargs,
3570 )
3572 if output_loading_info:
3573 if loading_info is None:
File ~/miniconda3/lib/python3.11/site-packages/transformers/integrations/peft.py:206, in PeftAdapterMixin.load_adapter(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index, peft_config, adapter_state_dict, adapter_kwargs)
203 processed_adapter_state_dict[new_key] = value
205 # Load state dict
--> 206 incompatible_keys = set_peft_model_state_dict(self, processed_adapter_state_dict, adapter_name)
208 if incompatible_keys is not None:
209 # check only for unexpected keys
210 if hasattr(incompatible_keys, "unexpected_keys") and len(incompatible_keys.unexpected_keys) > 0:
File ~/miniconda3/lib/python3.11/site-packages/peft/utils/save_and_load.py:249, in set_peft_model_state_dict(model, peft_model_state_dict, adapter_name)
246 else:
247 raise NotImplementedError
--> 249 load_result = model.load_state_dict(peft_model_state_dict, strict=False)
250 if config.is_prompt_learning:
251 model.prompt_encoder[adapter_name].embedding.load_state_dict(
252 {"weight": peft_model_state_dict["prompt_embeddings"]}, strict=True
253 )
File ~/miniconda3/lib/python3.11/site-packages/torch/nn/modules/module.py:2152, in Module.load_state_dict(self, state_dict, strict, assign)
2147 error_msgs.insert(
2148 0, 'Missing key(s) in state_dict: {}. '.format(
2149 ', '.join(f'"{k}"' for k in missing_keys)))
2151 if len(error_msgs) > 0:
-> 2152 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
2153 self.__class__.__name__, "\n\t".join(error_msgs)))
2154 return _IncompatibleKeys(missing_keys, unexpected_keys)
RuntimeError: Error(s) in loading state_dict for MistralForCausalLM:
size mismatch for lm_head.base_layer.weight: copying a param with shape torch.Size([32001, 4096]) from checkpoint, the shape in current model is torch.Size([32000, 4096]).
size mismatch for lm_head.lora_B.default.weight: copying a param with shape torch.Size([32001, 16]) from checkpoint, the shape in current model is torch.Size([32000, 16]).
My understanding is that the model is loaded via PreTrainedModel.from_pretrained
given in this line Somehow, the code first load the checkpoint of the original shape, and then update the weight with the checkpoint.