# coding=utf-8 from transformers.configuration_utils import PretrainedConfig from transformers import logging, AutoConfig from transformers import CONFIG_MAPPING logger = logging.get_logger(__name__) class CenturioConfig(PretrainedConfig): r""" Based on LlavaConfig. Args: vision_config (`str`, *optional*, timm model, defaults to `vit_so400m_patch14_siglip_384`): The config object or dictionary of the vision backbone. text_config (`Union[AutoConfig, dict]`, *optional*, defaults to `LlamaConfig`): The config object or dictionary of the text backbone. ignore_index (`int`, *optional*, defaults to -100): The ignore index for the loss function. image_token_index (`int`, *optional*, defaults to 32000): The image token index to encode the image prompt. adapter_type (`str`, *optional*, defaults to `multiscale-pool`): The adapter type. adapter_config (`dict`, *optional*, defaults to `None`): """ model_type = "centurio" is_composition = True def __init__( self, timm_model="vit_so400m_patch14_siglip_384", image_hidden_size=1024, text_config=None, ignore_index=-100, image_token_index=32000, adapter_type="multiscale-pool", adapter_config=None, **kwargs, ): self.ignore_index = ignore_index self.image_token_index = image_token_index self.adapter_type = adapter_type self.adapter_config = adapter_config self.timm_model = timm_model self.image_hidden_size = image_hidden_size if isinstance(text_config, dict): text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama" text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) elif text_config is None: text_config = CONFIG_MAPPING["llama"]() self.text_config = text_config super().__init__(**kwargs) # q = CenturioConfig( # text_config=AutoConfig.from_pretrained("Qwen/Qwen2.5-7B-Instruct"), # image_token_index=151665, # adapter_type="multiscale-pool", # adapter_config=dict(adapter_multi_scale=2), # attn_implementation="flash_attention_2" # ) # q.save_pretrained("centurio_qwen") # # a = CenturioConfig( # text_config=AutoConfig.from_pretrained("CohereForAI/aya-expanse-8b"), # image_token_index=255029, # adapter_type="multiscale-pool", # adapter_config=dict(adapter_multi_scale=2), # attn_implementation="flash_attention_2" # ) # a.save_pretrained("centurio_aya")