Unable to load a model with added special token

My model is trained with additional token

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))

However, I am unable to load the model in the standard way, i.e.

AutoModelForCausalLM.from_pretrained(checkpoint)

The following error is reported:

RuntimeError                              Traceback (most recent call last)
Cell In[9], line 27
     24 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     25 tokenizer.add_special_tokens({'pad_token': '[PAD]'})
---> 27 model = AutoModelForCausalLM.from_pretrained(
     28     dir, torch_dtype=torch.float16,
     29     trust_remote_code=True,
     30     device_map="auto",
     31     # quantization_config=quantization_config, 
     32 )
     33 # Configuration of some generation-related settings
     34 generation_config = GenerationConfig.from_pretrained(MODEL_NAME)

File ~/miniconda3/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:561, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    559 elif type(config) in cls._model_mapping.keys():
    560     model_class = _get_model_class(config, cls._model_mapping)
--> 561     return model_class.from_pretrained(
    562         pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    563     )
    564 raise ValueError(
    565     f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
    566     f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
    567 )

File ~/miniconda3/lib/python3.11/site-packages/transformers/modeling_utils.py:3565, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
   3562     model.hf_quantizer = hf_quantizer
   3564 if _adapter_model_path is not None:
-> 3565     model.load_adapter(
   3566         _adapter_model_path,
   3567         adapter_name=adapter_name,
   3568         token=token,
   3569         adapter_kwargs=adapter_kwargs,
   3570     )
   3572 if output_loading_info:
   3573     if loading_info is None:

File ~/miniconda3/lib/python3.11/site-packages/transformers/integrations/peft.py:206, in PeftAdapterMixin.load_adapter(self, peft_model_id, adapter_name, revision, token, device_map, max_memory, offload_folder, offload_index, peft_config, adapter_state_dict, adapter_kwargs)
    203     processed_adapter_state_dict[new_key] = value
    205 # Load state dict
--> 206 incompatible_keys = set_peft_model_state_dict(self, processed_adapter_state_dict, adapter_name)
    208 if incompatible_keys is not None:
    209     # check only for unexpected keys
    210     if hasattr(incompatible_keys, "unexpected_keys") and len(incompatible_keys.unexpected_keys) > 0:

File ~/miniconda3/lib/python3.11/site-packages/peft/utils/save_and_load.py:249, in set_peft_model_state_dict(model, peft_model_state_dict, adapter_name)
    246 else:
    247     raise NotImplementedError
--> 249 load_result = model.load_state_dict(peft_model_state_dict, strict=False)
    250 if config.is_prompt_learning:
    251     model.prompt_encoder[adapter_name].embedding.load_state_dict(
    252         {"weight": peft_model_state_dict["prompt_embeddings"]}, strict=True
    253     )

File ~/miniconda3/lib/python3.11/site-packages/torch/nn/modules/module.py:2152, in Module.load_state_dict(self, state_dict, strict, assign)
   2147         error_msgs.insert(
   2148             0, 'Missing key(s) in state_dict: {}. '.format(
   2149                 ', '.join(f'"{k}"' for k in missing_keys)))
   2151 if len(error_msgs) > 0:
-> 2152     raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
   2153                        self.__class__.__name__, "\n\t".join(error_msgs)))
   2154 return _IncompatibleKeys(missing_keys, unexpected_keys)
RuntimeError: Error(s) in loading state_dict for MistralForCausalLM:
	size mismatch for lm_head.base_layer.weight: copying a param with shape torch.Size([32001, 4096]) from checkpoint, the shape in current model is torch.Size([32000, 4096]).
	size mismatch for lm_head.lora_B.default.weight: copying a param with shape torch.Size([32001, 16]) from checkpoint, the shape in current model is torch.Size([32000, 16]).

My understanding is that the model is loaded via PreTrainedModel.from_pretrained given in this line Somehow, the code first load the checkpoint of the original shape, and then update the weight with the checkpoint.

Fixed the issue by following this thread /static-proxy?url=https%3A%2F%2Fdiscuss.huggingface.co%2Ft%2Floading-peft-model-from-checkpoint-leading-into-size-missmatch%2F71944%3C%2Fa%3E%3C%2Fp%3E