|
from transformers import AutoConfig, PretrainedConfig |
|
class FloSmolVConfig(PretrainedConfig): |
|
model_type = "flosmolV" |
|
|
|
def __init__( |
|
self, |
|
vision_config=None, |
|
llm_config=None, |
|
**kwargs, |
|
): |
|
super().__init__(**kwargs) |
|
|
|
if vision_config is None: |
|
vision_config = AutoConfig.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True) |
|
if llm_config is None: |
|
llm_config = AutoConfig.from_pretrained("HuggingFaceTB/SmolLM-360M-Instruct", trust_remote_code=True) |
|
self.vision_config = vision_config |
|
self.llm_config = llm_config |