from transformers import AutoConfig, PretrainedConfig class FloSmolVConfig(PretrainedConfig): model_type = "flosmolV" is_composition = True def __init__( self, vision_config=None, llm_config=None, **kwargs, ): if vision_config is None: vision_config = AutoConfig.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True) if llm_config is None: llm_config = AutoConfig.from_pretrained("HuggingFaceTB/SmolLM-360M-Instruct", trust_remote_code=True) self.vision_config = vision_config self.llm_config = llm_config super().__init__(**kwargs)