{ "architectures": [ "PolyverseForConditionalGeneration" ], "hidden_size": 1152, "image_token_index": 256204, "model_type": "Polyverse", "projection_dim": 1024, "sampler_tokens": 128, "text_config": { "_name_or_path": "facebook/nllb-200-distilled-1.3B", "architectures": [ "M2M100ForConditionalGeneration" ], "decoder_ffn_dim": 8192, "decoder_layerdrop": 0, "decoder_layers": 24, "encoder_ffn_dim": 8192, "encoder_layerdrop": 0, "encoder_layers": 24, "max_length": 200, "model_type": "m2m_100", "num_hidden_layers": 24, "num_image_tokens": 729, "torch_dtype": "float32", "vocab_size": 256206 }, "torch_dtype": "float32", "transformers_version": "4.44.0", "vision_config": { "_name_or_path": "google/siglip-so400m-patch14-384", "hidden_size": 1152, "image_size": 384, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14, "projection_dim": 1024 } }