Gregor commited on
Commit
8fd9ae0
·
verified ·
1 Parent(s): e4c89d7

Upload 2 files

Browse files
configuration_centurio.py CHANGED
@@ -37,7 +37,7 @@ class CenturioConfig(PretrainedConfig):
37
  ignore_index=-100,
38
  image_token_index=32000,
39
  adapter_type="multiscale-pool",
40
- adapter_config=None,
41
  **kwargs,
42
  ):
43
  self.ignore_index = ignore_index
 
37
  ignore_index=-100,
38
  image_token_index=32000,
39
  adapter_type="multiscale-pool",
40
+ adapter_config=dict(),
41
  **kwargs,
42
  ):
43
  self.ignore_index = ignore_index
modeling_centurio.py CHANGED
@@ -74,7 +74,7 @@ class LlavaMultiModalAdapter(nn.Module):
74
  class WindowMLPProjector(nn.Module):
75
  def __init__(self, config: LlavaConfig):
76
  super().__init__()
77
- self.multi_scale = getattr(config, "adapter_multi_scale", 2)
78
  self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
79
  self.act = ACT2FN["gelu"]
80
  self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True)
@@ -93,7 +93,7 @@ class WindowMLPProjector(nn.Module):
93
  class WindowPoolProjector(nn.Module):
94
  def __init__(self, config: LlavaConfig):
95
  super().__init__()
96
- self.multi_scale = getattr(config, "adapter_multi_scale", 2)
97
  self.pool = nn.AdaptiveAvgPool2d(getattr(config, "adapter_pool", 8))
98
  self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
99
  self.act = ACT2FN["gelu"]
@@ -119,7 +119,7 @@ class WindowPoolProjector(nn.Module):
119
  class WindowShuffelProjector(nn.Module):
120
  def __init__(self, config: LlavaConfig):
121
  super().__init__()
122
- self.multi_scale = getattr(config, "adapter_multi_scale", 2)
123
  self.scale_factor = getattr(config, "adapter_pool", 2)
124
  self.pixel_unshuffel = nn.PixelUnshuffle(self.scale_factor)
125
  self.linear_1 = nn.Linear(config.image_hidden_size*(self.scale_factor**2), config.text_config.hidden_size, bias=True)
@@ -148,7 +148,7 @@ class MultiscalePoolProjector(nn.Module):
148
  def __init__(self, config: LlavaConfig):
149
  super().__init__()
150
 
151
- self.multi_scale = getattr(config, "adapter_multi_scale", 2)
152
  self.pool = nn.AvgPool2d(self.multi_scale)
153
  self.linear_1 = nn.Linear(config.image_hidden_size*2, config.text_config.hidden_size, bias=True)
154
  self.act = ACT2FN["gelu"]
@@ -181,7 +181,7 @@ class MultiscaleShuffleProjector(nn.Module):
181
  def __init__(self, config):
182
  super().__init__()
183
 
184
- self.multi_scale = getattr(config, "adapter_multi_scale", 2)
185
  self.shuffle = nn.PixelUnshuffle(self.multi_scale)
186
 
187
  inc, ouc = config.image_hidden_size*(1+self.multi_scale**2), config.text_config.hidden_size
@@ -447,7 +447,8 @@ class CenturioForConditionalGeneration(LlavaPreTrainedModel):
447
  self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
448
  self.post_init()
449
 
450
-
 
451
 
452
  def get_input_embeddings(self):
453
  return self.language_model.get_input_embeddings()
 
74
  class WindowMLPProjector(nn.Module):
75
  def __init__(self, config: LlavaConfig):
76
  super().__init__()
77
+ self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
78
  self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
79
  self.act = ACT2FN["gelu"]
80
  self.linear_2 = nn.Linear(config.text_config.hidden_size, config.text_config.hidden_size, bias=True)
 
93
  class WindowPoolProjector(nn.Module):
94
  def __init__(self, config: LlavaConfig):
95
  super().__init__()
96
+ self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
97
  self.pool = nn.AdaptiveAvgPool2d(getattr(config, "adapter_pool", 8))
98
  self.linear_1 = nn.Linear(config.image_hidden_size, config.text_config.hidden_size, bias=True)
99
  self.act = ACT2FN["gelu"]
 
119
  class WindowShuffelProjector(nn.Module):
120
  def __init__(self, config: LlavaConfig):
121
  super().__init__()
122
+ self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
123
  self.scale_factor = getattr(config, "adapter_pool", 2)
124
  self.pixel_unshuffel = nn.PixelUnshuffle(self.scale_factor)
125
  self.linear_1 = nn.Linear(config.image_hidden_size*(self.scale_factor**2), config.text_config.hidden_size, bias=True)
 
148
  def __init__(self, config: LlavaConfig):
149
  super().__init__()
150
 
151
+ self.multi_scale = config.adapter_config.get("multi_scale", 2) #getattr(config.adapter_config, "adapter_multi_scale", 2)
152
  self.pool = nn.AvgPool2d(self.multi_scale)
153
  self.linear_1 = nn.Linear(config.image_hidden_size*2, config.text_config.hidden_size, bias=True)
154
  self.act = ACT2FN["gelu"]
 
181
  def __init__(self, config):
182
  super().__init__()
183
 
184
+ self.multi_scale = config.adapter_config.get("multi_scale", 2) #config.adapter_config.get("multi_scale")
185
  self.shuffle = nn.PixelUnshuffle(self.multi_scale)
186
 
187
  inc, ouc = config.image_hidden_size*(1+self.multi_scale**2), config.text_config.hidden_size
 
447
  self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
448
  self.post_init()
449
 
450
+ def tie_weights(self):
451
+ return self.language_model.tie_weights()
452
 
453
  def get_input_embeddings(self):
454
  return self.language_model.get_input_embeddings()