remove image tokens from chatglm-6b

Browse files

Files changed (8) hide show

config.json +4 -4
configuration_chatglm.py +4 -3
modeling_chatglm.py +14 -13
pytorch_model-00001-of-00008.bin → pytorch_model-00001-of-00008-slim.bin +2 -2
pytorch_model-00008-of-00008.bin → pytorch_model-00008-of-00008-slim.bin +2 -2
pytorch_model.bin.index.json +26 -26
tokenization_chatglm.py +6 -16
tokenizer_config.json +1 -1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "THUDM/chatglm-6b",
   "architectures": [
     "ChatGLMModel"
   ],
@@ -8,8 +8,8 @@
     "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
     "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
   },
-  "bos_token_id": 150004,
-  "eos_token_id": 150005,
   "hidden_size": 4096,
   "inner_hidden_size": 16384,
   "layernorm_epsilon": 1e-05,
@@ -21,5 +21,5 @@
   "torch_dtype": "float16",
   "transformers_version": "4.23.1",
   "use_cache": true,
-  "vocab_size": 150528
 }

 {
+  "_name_or_path": "silver/chatglm-6b-slim",
   "architectures": [
     "ChatGLMModel"
   ],
     "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
     "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration"
   },
+  "bos_token_id": 130004,
+  "eos_token_id": 130005,
   "hidden_size": 4096,
   "inner_hidden_size": 16384,
   "layernorm_epsilon": 1e-05,
   "torch_dtype": "float16",
   "transformers_version": "4.23.1",
   "use_cache": true,
+  "vocab_size": 130528
 }

configuration_chatglm.py CHANGED Viewed

@@ -12,6 +12,7 @@ class ChatGLMConfig(PretrainedConfig):
     It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
     architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
     the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
     Configuration objects inherit from  [`PretrainedConfig`] and can be used
     to control the model outputs. Read the documentation from  [`PretrainedConfig`]
@@ -58,14 +59,14 @@ class ChatGLMConfig(PretrainedConfig):
     def __init__(
             self,
-            vocab_size=150528,
             hidden_size=4096,
             num_layers=28,
             num_attention_heads=32,
             layernorm_epsilon=1e-5,
             use_cache=False,
-            bos_token_id=150004,
-            eos_token_id=150005,
             pad_token_id=0,
             max_sequence_length=2048,
             inner_hidden_size=16384,

     It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
     architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
     the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
+    We remove 20K image tokens on top of ChatGLM-6B to save memories.
     Configuration objects inherit from  [`PretrainedConfig`] and can be used
     to control the model outputs. Read the documentation from  [`PretrainedConfig`]
     def __init__(
             self,
+            vocab_size=130528,
             hidden_size=4096,
             num_layers=28,
             num_attention_heads=32,
             layernorm_epsilon=1e-5,
             use_cache=False,
+            bos_token_id=130004,
+            eos_token_id=130005,
             pad_token_id=0,
             max_sequence_length=2048,
             inner_hidden_size=16384,

modeling_chatglm.py CHANGED Viewed

@@ -28,7 +28,7 @@ from transformers.utils import logging
 from transformers.generation.logits_process import LogitsProcessor
 from transformers.generation.utils import LogitsProcessorList
-from .configuration_chatglm import ChatGLMConfig
 # flags required to enable jit fusion kernels
 torch._C._jit_set_profiling_mode(False)
@@ -38,12 +38,13 @@ torch._C._jit_override_can_fuse_on_gpu(True)
 logger = logging.get_logger(__name__)
-_CHECKPOINT_FOR_DOC = "THUDM/ChatGLM-6B"
 _CONFIG_FOR_DOC = "ChatGLM6BConfig"
 CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "THUDM/chatglm-6b",
     # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
 ]
@@ -51,7 +52,7 @@ class InvalidScoreLogitsProcessor(LogitsProcessor):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
         if torch.isnan(scores).any() or torch.isinf(scores).any():
             scores.zero_()
-            scores[..., 20005] = 5e4
         return scores
@@ -755,7 +756,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
     @staticmethod
     def get_masks(seq, device):
-        context_length = seq.index(150004) + 1
         attention_mask = torch.ones((1, len(seq), len(seq)), device=device)
         attention_mask.tril_()
@@ -766,9 +767,9 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
         return attention_mask
     def get_position_ids(self, seq, mask_position, device, gmask=False):
-        context_length = seq.index(150004) + 1
         if self.position_encoding_2d:
-            seq_length = seq.index(150004)
             position_ids = torch.arange(context_length, dtype=torch.long, device=device)
             if not gmask:
                 position_ids[seq_length:] = mask_position
@@ -824,7 +825,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
         if past_key_values is None:
             past_key_values = tuple([None] * len(self.layers))
-            MASK, gMASK = 150000, 150001
             mask_token = MASK if MASK in input_ids else gMASK
             use_gmask = False if MASK in input_ids else gMASK
             seq = input_ids[0].tolist()
@@ -941,7 +942,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
         attention_mask = (attention_mask < 0.5).bool()
         if self.position_encoding_2d:
-            seq_length = seq.index(150004)
             position_ids = torch.arange(context_length, dtype=torch.long, device=device)
             if not gmask:
                 position_ids[seq_length:] = mask_position
@@ -968,7 +969,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             **kwargs
     ) -> dict:
-        MASK, gMASK = 150000, 150001
         mask_token = MASK if MASK in input_ids else gMASK
         use_gmask = False if MASK in input_ids else gMASK
         seq = input_ids[0].tolist()
@@ -979,7 +980,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
         # only last token for input_ids if past is not None
         if past is not None or past_key_values is not None:
-            context_length = seq.index(150004)
             last_token = input_ids[:, -1].unsqueeze(-1)
             if self.position_encoding_2d:
                 position_ids = torch.tensor([[[mask_position], [len(seq) - context_length]]], dtype=torch.long,
@@ -1119,8 +1120,8 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             self,
             **kwargs,
     ):
-        MASK, gMASK = 150000, 150001
-        bos, eos = 150004, 150005
         if "eos_token_id" not in kwargs:
             kwargs["eos_token_id"] = eos

 from transformers.generation.logits_process import LogitsProcessor
 from transformers.generation.utils import LogitsProcessorList
+from configuration_chatglm import ChatGLMConfig
 # flags required to enable jit fusion kernels
 torch._C._jit_set_profiling_mode(False)
 logger = logging.get_logger(__name__)
+_CHECKPOINT_FOR_DOC = "silver/ChatGLM-6B"
 _CONFIG_FOR_DOC = "ChatGLM6BConfig"
 CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "silver/chatglm-6b-slim",
     # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
+    # See the slim model at https://huggingface.co/silver/chatglm-6b-slim
 ]
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
         if torch.isnan(scores).any() or torch.isinf(scores).any():
             scores.zero_()
+            scores[..., 5] = 5e4
         return scores
     @staticmethod
     def get_masks(seq, device):
+        context_length = seq.index(130004) + 1
         attention_mask = torch.ones((1, len(seq), len(seq)), device=device)
         attention_mask.tril_()
         return attention_mask
     def get_position_ids(self, seq, mask_position, device, gmask=False):
+        context_length = seq.index(130004) + 1
         if self.position_encoding_2d:
+            seq_length = seq.index(130004)
             position_ids = torch.arange(context_length, dtype=torch.long, device=device)
             if not gmask:
                 position_ids[seq_length:] = mask_position
         if past_key_values is None:
             past_key_values = tuple([None] * len(self.layers))
+            MASK, gMASK = 130000, 130001
             mask_token = MASK if MASK in input_ids else gMASK
             use_gmask = False if MASK in input_ids else gMASK
             seq = input_ids[0].tolist()
         attention_mask = (attention_mask < 0.5).bool()
         if self.position_encoding_2d:
+            seq_length = seq.index(130004)
             position_ids = torch.arange(context_length, dtype=torch.long, device=device)
             if not gmask:
                 position_ids[seq_length:] = mask_position
             **kwargs
     ) -> dict:
+        MASK, gMASK = 130000, 130001
         mask_token = MASK if MASK in input_ids else gMASK
         use_gmask = False if MASK in input_ids else gMASK
         seq = input_ids[0].tolist()
         # only last token for input_ids if past is not None
         if past is not None or past_key_values is not None:
+            context_length = seq.index(130004)
             last_token = input_ids[:, -1].unsqueeze(-1)
             if self.position_encoding_2d:
                 position_ids = torch.tensor([[[mask_position], [len(seq) - context_length]]], dtype=torch.long,
             self,
             **kwargs,
     ):
+        MASK, gMASK = 130000, 130001
+        bos, eos = 130004, 130005
         if "eos_token_id" not in kwargs:
             kwargs["eos_token_id"] = eos

pytorch_model-00001-of-00008.bin → pytorch_model-00001-of-00008-slim.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe5bac6bfa5b5404ddfe3fabe04862b785e013afd7b308b7beca08239f9489fa
-size 1904491802

 version https://git-lfs.github.com/spec/v1
+oid sha256:c85647a7f3c817274a767dbee01428a9f1b3eb855cfd7849625b8ad7753e4dbf
+size 1904493208

pytorch_model-00008-of-00008.bin → pytorch_model-00008-of-00008-slim.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e764ebdece24219efeda3c18aa32fe6414da3d1f533df8845815609e9ef7f4a7
-size 1233126123

 version https://git-lfs.github.com/spec/v1
+oid sha256:36e8039413913b7326c4fc5fcbcd2bf4c03b03bea1ff1bdaf6b74b46df0053e9
+size 1233126329

pytorch_model.bin.index.json CHANGED Viewed

@@ -3,35 +3,35 @@
     "total_size": 13744473856
   },
   "weight_map": {
-    "lm_head.weight": "pytorch_model-00008-of-00008.bin",
     "transformer.final_layernorm.bias": "pytorch_model-00007-of-00008.bin",
     "transformer.final_layernorm.weight": "pytorch_model-00007-of-00008.bin",
-    "transformer.layers.0.attention.dense.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.attention.dense.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.attention.query_key_value.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.attention.query_key_value.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.input_layernorm.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.post_attention_layernorm.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.attention.dense.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.attention.dense.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.attention.query_key_value.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.attention.query_key_value.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.input_layernorm.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00008.bin",
     "transformer.layers.1.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00008.bin",
     "transformer.layers.1.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00008.bin",
-    "transformer.layers.1.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.post_attention_layernorm.bias": "pytorch_model-00001-of-00008.bin",
-    "transformer.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00008.bin",
     "transformer.layers.10.attention.dense.bias": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.10.attention.dense.weight": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.10.attention.query_key_value.bias": "pytorch_model-00003-of-00008.bin",
@@ -370,6 +370,6 @@
     "transformer.layers.9.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.9.post_attention_layernorm.bias": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.9.post_attention_layernorm.weight": "pytorch_model-00003-of-00008.bin",
-    "transformer.word_embeddings.weight": "pytorch_model-00001-of-00008.bin"
   }
 }

     "total_size": 13744473856
   },
   "weight_map": {
+    "lm_head.weight": "pytorch_model-00008-of-00008-slim.bin",
     "transformer.final_layernorm.bias": "pytorch_model-00007-of-00008.bin",
     "transformer.final_layernorm.weight": "pytorch_model-00007-of-00008.bin",
+    "transformer.layers.0.attention.dense.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.attention.dense.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.attention.query_key_value.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.attention.query_key_value.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.input_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.post_attention_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.attention.dense.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.attention.dense.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.attention.query_key_value.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.attention.query_key_value.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.attention.rotary_emb.inv_freq": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.input_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
     "transformer.layers.1.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00008.bin",
     "transformer.layers.1.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00008.bin",
+    "transformer.layers.1.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.post_attention_layernorm.bias": "pytorch_model-00001-of-00008-slim.bin",
+    "transformer.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00008-slim.bin",
     "transformer.layers.10.attention.dense.bias": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.10.attention.dense.weight": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.10.attention.query_key_value.bias": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.9.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.9.post_attention_layernorm.bias": "pytorch_model-00003-of-00008.bin",
     "transformer.layers.9.post_attention_layernorm.weight": "pytorch_model-00003-of-00008.bin",
+    "transformer.word_embeddings.weight": "pytorch_model-00001-of-00008-slim.bin"
   }
 }

tokenization_chatglm.py CHANGED Viewed

@@ -16,7 +16,7 @@ from transformers.utils import logging
 logger = logging.get_logger(__name__)
 PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
-    "THUDM/chatglm-6b": 2048,
 }
@@ -85,17 +85,13 @@ class SPTokenizer:
     def get_tab_token():
         return f"<|tab|>"
-    @property
-    def num_image_tokens(self):
-        return 20000
     @property
     def num_text_tokens(self):
         return self.text_tokenizer.num_tokens
     @property
     def num_tokens(self):
-        return self.num_image_tokens + self.num_text_tokens
     @staticmethod
     def _encode_whitespaces(text: str, max_len: int = 80):
@@ -125,11 +121,11 @@ class SPTokenizer:
         if not add_dummy_prefix:
             text = "<n>" + text
         tmp = self._get_text_tokenizer(encode_special_tokens=special_tokens).encode(text)
-        tokens = [x + self.num_image_tokens for x in tmp]
         return tokens if add_dummy_prefix else tokens[2:]
     def decode(self, text_ids: List[int], special_tokens=False) -> str:
-        ids = [int(_id) - self.num_image_tokens for _id in text_ids]
         ids = [_id for _id in ids if _id >= 0]
         text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
         text = text.replace("<n>", "\n")
@@ -156,15 +152,9 @@ class SPTokenizer:
     def __getitem__(self, x: Union[int, str]):
         if isinstance(x, int):
-            if x < self.num_image_tokens:
-                return "<image_{}>".format(x)
-            else:
-                return self.text_tokenizer.convert_id_to_token(x - self.num_image_tokens)
         elif isinstance(x, str):
-            if x.startswith("<image_") and x.endswith(">") and x[7:-1].isdigit():
-                return int(x[7:-1])
-            else:
-                return self.text_tokenizer.convert_token_to_id(x) + self.num_image_tokens
         else:
             raise ValueError("The key should be str or int.")

 logger = logging.get_logger(__name__)
 PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
+    "silver/chatglm-6b-slim": 2048,
 }
     def get_tab_token():
         return f"<|tab|>"
     @property
     def num_text_tokens(self):
         return self.text_tokenizer.num_tokens
     @property
     def num_tokens(self):
+        return self.num_text_tokens
     @staticmethod
     def _encode_whitespaces(text: str, max_len: int = 80):
         if not add_dummy_prefix:
             text = "<n>" + text
         tmp = self._get_text_tokenizer(encode_special_tokens=special_tokens).encode(text)
+        tokens = [x for x in tmp]
         return tokens if add_dummy_prefix else tokens[2:]
     def decode(self, text_ids: List[int], special_tokens=False) -> str:
+        ids = [int(_id) for _id in text_ids]
         ids = [_id for _id in ids if _id >= 0]
         text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
         text = text.replace("<n>", "\n")
     def __getitem__(self, x: Union[int, str]):
         if isinstance(x, int):
+            return self.text_tokenizer.convert_id_to_token(x)
         elif isinstance(x, str):
+            return self.text_tokenizer.convert_token_to_id(x)
         else:
             raise ValueError("The key should be str or int.")

tokenizer_config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "name_or_path": "THUDM/chatglm-6b",
   "bos_token": "<sop>",
   "eop_token": "<eop>",
   "eos_token": "</s>",

 {
+  "name_or_path": "silver/chatglm-6b-slim",
   "bos_token": "<sop>",
   "eop_token": "<eop>",
   "eos_token": "</s>",