openvino-ci commited on
Commit
ac7e290
·
verified ·
1 Parent(s): ba29ced

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -14,8 +14,8 @@ This is [Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-
14
  Weight compression was performed using `nncf.compress_weights` with the following parameters:
15
 
16
  * mode: **int4_asym**
17
- * group_size: **128**
18
- * ratio: **0.8**
19
 
20
  For more information on quantization, check the [OpenVINO model optimization guide](https://docs.openvino.ai/2024/openvino-workflow/model-optimization-guide/weight-compression.html).
21
 
@@ -24,11 +24,10 @@ For more information on quantization, check the [OpenVINO model optimization gui
24
 
25
  The provided OpenVINO™ IR model is compatible with:
26
 
27
- * OpenVINO version 2024.2.0 and higher
28
- * Optimum Intel 1.18.0 and higher
29
-
30
- ## Running Model Inference with [Optimum Intel](https://huggingface.co/docs/optimum/intel/index)
31
 
 
32
 
33
  1. Install packages required for using [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) integration with the OpenVINO backend:
34
 
@@ -55,40 +54,9 @@ print(text)
55
 
56
  For more examples and possible optimizations, refer to the [OpenVINO Large Language Model Inference Guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).
57
 
58
- ## Running Model Inference with [OpenVINO GenAI](https://github.com/openvinotoolkit/openvino.genai)
59
-
60
- 1. Install packages required for using OpenVINO GenAI.
61
- ```
62
- pip install openvino-genai huggingface_hub
63
- ```
64
-
65
- 2. Download model from HuggingFace Hub
66
-
67
- ```
68
- import huggingface_hub as hf_hub
69
-
70
- model_id = "OpenVINO/Phi-3-mini-4k-instruct-int4-ov"
71
- model_path = "Phi-3-mini-4k-instruct-int4-ov"
72
-
73
- hf_hub.snapshot_download(model_id, local_dir=model_path)
74
-
75
- ```
76
-
77
- 3. Run model inference:
78
-
79
- ```
80
- import openvino_genai as ov_genai
81
-
82
- device = "CPU"
83
- pipe = ov_genai.LLMPipeline(model_path, device)
84
- print(pipe.generate("What is OpenVINO?", max_length=200))
85
- ```
86
-
87
- More GenAI usage examples can be found in OpenVINO GenAI library [docs](https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md) and [samples](https://github.com/openvinotoolkit/openvino.genai?tab=readme-ov-file#openvino-genai-samples)
88
-
89
  ## Limitations
90
 
91
- Check the original model card for [limitations](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct#responsible-ai-considerations).
92
 
93
  ## Legal information
94
 
@@ -96,4 +64,4 @@ The original model is distributed under [mit](https://choosealicense.com/license
96
 
97
  ## Disclaimer
98
 
99
- Intel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights.
 
14
  Weight compression was performed using `nncf.compress_weights` with the following parameters:
15
 
16
  * mode: **int4_asym**
17
+ * ratio: **1**
18
+ * group_size: **64**
19
 
20
  For more information on quantization, check the [OpenVINO model optimization guide](https://docs.openvino.ai/2024/openvino-workflow/model-optimization-guide/weight-compression.html).
21
 
 
24
 
25
  The provided OpenVINO™ IR model is compatible with:
26
 
27
+ * OpenVINO version 2024.4.0 and higher
28
+ * Optimum Intel 1.23.1 and higher
 
 
29
 
30
+ ## Running Model Inference
31
 
32
  1. Install packages required for using [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) integration with the OpenVINO backend:
33
 
 
54
 
55
  For more examples and possible optimizations, refer to the [OpenVINO Large Language Model Inference Guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html).
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ## Limitations
58
 
59
+ Check the original model card for [original model card](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) for limitations.
60
 
61
  ## Legal information
62
 
 
64
 
65
  ## Disclaimer
66
 
67
+ Intel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights.
config.json CHANGED
@@ -3,6 +3,7 @@
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
  "auto_map": {
8
  "AutoConfig": "configuration_phi3.Phi3Config",
@@ -28,7 +29,8 @@
28
  "rope_theta": 10000.0,
29
  "sliding_window": 2047,
30
  "tie_word_embeddings": false,
31
- "transformers_version": "4.41.2",
 
32
  "use_cache": true,
33
  "vocab_size": 32064
34
  }
 
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
6
+ "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "auto_map": {
9
  "AutoConfig": "configuration_phi3.Phi3Config",
 
29
  "rope_theta": 10000.0,
30
  "sliding_window": 2047,
31
  "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.45.2",
34
  "use_cache": true,
35
  "vocab_size": 32064
36
  }
configuration_phi3.py CHANGED
@@ -1,213 +1,227 @@
1
- # coding=utf-8
2
- # Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- """ Phi-3 model configuration"""
17
-
18
-
19
- from transformers.configuration_utils import PretrainedConfig
20
- from transformers.utils import logging
21
-
22
-
23
- logger = logging.get_logger(__name__)
24
-
25
- PHI3_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26
- "microsoft/Phi-3-mini-4k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/config.json",
27
- "microsoft/Phi-3-mini-128k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/config.json",
28
- }
29
-
30
-
31
- class Phi3Config(PretrainedConfig):
32
- r"""
33
- This is the configuration class to store the configuration of a [`Phi3Model`]. It is used to instantiate a Phi-3
34
- model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
35
- defaults will yield a similar configuration to that of the
36
- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct).
37
-
38
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
39
- documentation from [`PretrainedConfig`] for more information.
40
-
41
- Args:
42
- vocab_size (`int`, *optional*, defaults to 32064):
43
- Vocabulary size of the Phi-3 model. Defines the number of different tokens that can be represented by the
44
- `inputs_ids` passed when calling [`Phi3Model`].
45
- hidden_size (`int`, *optional*, defaults to 3072):
46
- Dimension of the hidden representations.
47
- intermediate_size (`int`, *optional*, defaults to 8192):
48
- Dimension of the MLP representations.
49
- num_hidden_layers (`int`, *optional*, defaults to 32):
50
- Number of hidden layers in the Transformer decoder.
51
- num_attention_heads (`int`, *optional*, defaults to 32):
52
- Number of attention heads for each attention layer in the Transformer decoder.
53
- num_key_value_heads (`int`, *optional*):
54
- This is the number of key_value heads that should be used to implement Grouped Query Attention. If
55
- `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
56
- `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
57
- converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
58
- by meanpooling all the original heads within that group. For more details checkout [this
59
- paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
60
- `num_attention_heads`.
61
- resid_pdrop (`float`, *optional*, defaults to 0.0):
62
- Dropout probability for mlp outputs.
63
- embd_pdrop (`int`, *optional*, defaults to 0.0):
64
- The dropout ratio for the embeddings.
65
- attention_dropout (`float`, *optional*, defaults to 0.0):
66
- The dropout ratio after computing the attention scores.
67
- hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
68
- The non-linear activation function (function or string) in the decoder.
69
- max_position_embeddings (`int`, *optional*, defaults to 4096):
70
- The maximum sequence length that this model might ever be used with.
71
- original_max_position_embeddings (`int`, *optional*, defaults to 4096):
72
- The maximum sequence length that this model was trained with. This is used to determine the size of the
73
- original RoPE embeddings when using long scaling.
74
- initializer_range (`float`, *optional*, defaults to 0.02):
75
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
76
- rms_norm_eps (`float`, *optional*, defaults to 1e-05):
77
- The epsilon value used for the RMSNorm.
78
- use_cache (`bool`, *optional*, defaults to `True`):
79
- Whether or not the model should return the last key/values attentions (not used by all models). Only
80
- relevant if `config.is_decoder=True`. Whether to tie weight embeddings or not.
81
- tie_word_embeddings (`bool`, *optional*, defaults to `False`):
82
- Whether to tie weight embeddings
83
- rope_theta (`float`, *optional*, defaults to 10000.0):
84
- The base period of the RoPE embeddings.
85
- rope_scaling (`dict`, *optional*):
86
- The scaling strategy for the RoPE embeddings. If `None`, no scaling is applied. If a dictionary, it must
87
- contain the following keys: `type`, `short_factor` and `long_factor`. The `type` must be either `su` or `yarn` and
88
- the `short_factor` and `long_factor` must be lists of numbers with the same length as the hidden size
89
- divided by the number of attention heads divided by 2.
90
- bos_token_id (`int`, *optional*, defaults to 1):
91
- The id of the "beginning-of-sequence" token.
92
- eos_token_id (`int`, *optional*, defaults to 32000):
93
- The id of the "end-of-sequence" token.
94
- pad_token_id (`int`, *optional*, defaults to 32000):
95
- The id of the padding token.
96
- sliding_window (`int`, *optional*):
97
- Sliding window attention window size. If `None`, no sliding window is applied.
98
-
99
- Example:
100
-
101
- ```python
102
- >>> from transformers import Phi3Model, Phi3Config
103
-
104
- >>> # Initializing a Phi-3 style configuration
105
- >>> configuration = Phi3Config.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
106
-
107
- >>> # Initializing a model from the configuration
108
- >>> model = Phi3Model(configuration)
109
-
110
- >>> # Accessing the model configuration
111
- >>> configuration = model.config
112
- ```"""
113
-
114
- model_type = "phi3"
115
- keys_to_ignore_at_inference = ["past_key_values"]
116
-
117
- def __init__(
118
- self,
119
- vocab_size=32064,
120
- hidden_size=3072,
121
- intermediate_size=8192,
122
- num_hidden_layers=32,
123
- num_attention_heads=32,
124
- num_key_value_heads=None,
125
- resid_pdrop=0.0,
126
- embd_pdrop=0.0,
127
- attention_dropout=0.0,
128
- hidden_act="silu",
129
- max_position_embeddings=4096,
130
- original_max_position_embeddings=4096,
131
- initializer_range=0.02,
132
- rms_norm_eps=1e-5,
133
- use_cache=True,
134
- tie_word_embeddings=False,
135
- rope_theta=10000.0,
136
- rope_scaling=None,
137
- bos_token_id=1,
138
- eos_token_id=32000,
139
- pad_token_id=32000,
140
- sliding_window=None,
141
- **kwargs,
142
- ):
143
- self.vocab_size = vocab_size
144
- self.hidden_size = hidden_size
145
- self.intermediate_size = intermediate_size
146
- self.num_hidden_layers = num_hidden_layers
147
- self.num_attention_heads = num_attention_heads
148
-
149
- if num_key_value_heads is None:
150
- num_key_value_heads = num_attention_heads
151
-
152
- self.num_key_value_heads = num_key_value_heads
153
- self.resid_pdrop = resid_pdrop
154
- self.embd_pdrop = embd_pdrop
155
- self.attention_dropout = attention_dropout
156
- self.hidden_act = hidden_act
157
- self.max_position_embeddings = max_position_embeddings
158
- self.original_max_position_embeddings = original_max_position_embeddings
159
- self.initializer_range = initializer_range
160
- self.rms_norm_eps = rms_norm_eps
161
- self.use_cache = use_cache
162
- self.rope_theta = rope_theta
163
- self.rope_scaling = rope_scaling
164
- self._rope_scaling_validation()
165
- self.sliding_window = sliding_window
166
-
167
- super().__init__(
168
- bos_token_id=bos_token_id,
169
- eos_token_id=eos_token_id,
170
- pad_token_id=pad_token_id,
171
- tie_word_embeddings=tie_word_embeddings,
172
- **kwargs,
173
- )
174
-
175
- def _rope_scaling_validation(self):
176
- """
177
- Validate the `rope_scaling` configuration.
178
- """
179
- if self.rope_scaling is None:
180
- return
181
-
182
- if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 3:
183
- raise ValueError(
184
- "`rope_scaling` must be a dictionary with three fields, `type`, `short_factor` and `long_factor`, "
185
- f"got {self.rope_scaling}"
186
- )
187
- rope_scaling_type = self.rope_scaling.get("type", None)
188
- rope_scaling_short_factor = self.rope_scaling.get("short_factor", None)
189
- rope_scaling_long_factor = self.rope_scaling.get("long_factor", None)
190
- if rope_scaling_type is None or rope_scaling_type not in ["su", "yarn"]:
191
- raise ValueError(f"`rope_scaling`'s type field must be one of ['su', 'yarn'], got {rope_scaling_type}")
192
- if not (
193
- isinstance(rope_scaling_short_factor, list)
194
- and all(isinstance(x, (int, float)) for x in rope_scaling_short_factor)
195
- ):
196
- raise ValueError(
197
- f"`rope_scaling`'s short_factor field must be a list of numbers, got {rope_scaling_short_factor}"
198
- )
199
- if not len(rope_scaling_short_factor) == self.hidden_size // self.num_attention_heads // 2:
200
- raise ValueError(
201
- f"`rope_scaling`'s short_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_short_factor)}"
202
- )
203
- if not (
204
- isinstance(rope_scaling_long_factor, list)
205
- and all(isinstance(x, (int, float)) for x in rope_scaling_long_factor)
206
- ):
207
- raise ValueError(
208
- f"`rope_scaling`'s long_factor field must be a list of numbers, got {rope_scaling_long_factor}"
209
- )
210
- if not len(rope_scaling_long_factor) == self.hidden_size // self.num_attention_heads // 2:
211
- raise ValueError(
212
- f"`rope_scaling`'s long_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_long_factor)}"
213
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """ Phi-3 model configuration"""
17
+
18
+
19
+ from transformers.configuration_utils import PretrainedConfig
20
+ from transformers.utils import logging
21
+
22
+
23
+ logger = logging.get_logger(__name__)
24
+
25
+ PHI3_PRETRAINED_CONFIG_ARCHIVE_MAP = {
26
+ "microsoft/Phi-3-mini-4k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/config.json",
27
+ "microsoft/Phi-3-mini-128k-instruct": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/config.json",
28
+ }
29
+
30
+
31
+ class Phi3Config(PretrainedConfig):
32
+ r"""
33
+ This is the configuration class to store the configuration of a [`Phi3Model`]. It is used to instantiate a Phi-3
34
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
35
+ defaults will yield a similar configuration to that of the
36
+ [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct).
37
+
38
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
39
+ documentation from [`PretrainedConfig`] for more information.
40
+
41
+ Args:
42
+ vocab_size (`int`, *optional*, defaults to 32064):
43
+ Vocabulary size of the Phi-3 model. Defines the number of different tokens that can be represented by the
44
+ `inputs_ids` passed when calling [`Phi3Model`].
45
+ hidden_size (`int`, *optional*, defaults to 3072):
46
+ Dimension of the hidden representations.
47
+ intermediate_size (`int`, *optional*, defaults to 8192):
48
+ Dimension of the MLP representations.
49
+ num_hidden_layers (`int`, *optional*, defaults to 32):
50
+ Number of hidden layers in the Transformer decoder.
51
+ num_attention_heads (`int`, *optional*, defaults to 32):
52
+ Number of attention heads for each attention layer in the Transformer decoder.
53
+ num_key_value_heads (`int`, *optional*):
54
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
55
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
56
+ `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
57
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
58
+ by meanpooling all the original heads within that group. For more details checkout [this
59
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
60
+ `num_attention_heads`.
61
+ resid_pdrop (`float`, *optional*, defaults to 0.0):
62
+ Dropout probability for mlp outputs.
63
+ embd_pdrop (`int`, *optional*, defaults to 0.0):
64
+ The dropout ratio for the embeddings.
65
+ attention_dropout (`float`, *optional*, defaults to 0.0):
66
+ The dropout ratio after computing the attention scores.
67
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
68
+ The non-linear activation function (function or string) in the decoder.
69
+ max_position_embeddings (`int`, *optional*, defaults to 4096):
70
+ The maximum sequence length that this model might ever be used with.
71
+ original_max_position_embeddings (`int`, *optional*, defaults to 4096):
72
+ The maximum sequence length that this model was trained with. This is used to determine the size of the
73
+ original RoPE embeddings when using long scaling.
74
+ initializer_range (`float`, *optional*, defaults to 0.02):
75
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
76
+ rms_norm_eps (`float`, *optional*, defaults to 1e-05):
77
+ The epsilon value used for the RMSNorm.
78
+ use_cache (`bool`, *optional*, defaults to `True`):
79
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
80
+ relevant if `config.is_decoder=True`. Whether to tie weight embeddings or not.
81
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
82
+ Whether to tie weight embeddings
83
+ rope_theta (`float`, *optional*, defaults to 10000.0):
84
+ The base period of the RoPE embeddings.
85
+ rope_scaling (`dict`, *optional*):
86
+ The scaling strategy for the RoPE embeddings. If `None`, no scaling is applied. If a dictionary, it must
87
+ contain the following keys: `type`, `short_factor` and `long_factor`. The `type` must be `longrope` and
88
+ the `short_factor` and `long_factor` must be lists of numbers with the same length as the hidden size
89
+ divided by the number of attention heads divided by 2.
90
+ bos_token_id (`int`, *optional*, defaults to 1):
91
+ The id of the "beginning-of-sequence" token.
92
+ eos_token_id (`int`, *optional*, defaults to 32000):
93
+ The id of the "end-of-sequence" token.
94
+ pad_token_id (`int`, *optional*, defaults to 32000):
95
+ The id of the padding token.
96
+ sliding_window (`int`, *optional*):
97
+ Sliding window attention window size. If `None`, no sliding window is applied.
98
+
99
+ Example:
100
+
101
+ ```python
102
+ >>> from transformers import Phi3Model, Phi3Config
103
+
104
+ >>> # Initializing a Phi-3 style configuration
105
+ >>> configuration = Phi3Config.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
106
+
107
+ >>> # Initializing a model from the configuration
108
+ >>> model = Phi3Model(configuration)
109
+
110
+ >>> # Accessing the model configuration
111
+ >>> configuration = model.config
112
+ ```"""
113
+
114
+ model_type = "phi3"
115
+ keys_to_ignore_at_inference = ["past_key_values"]
116
+
117
+ def __init__(
118
+ self,
119
+ vocab_size=32064,
120
+ hidden_size=3072,
121
+ intermediate_size=8192,
122
+ num_hidden_layers=32,
123
+ num_attention_heads=32,
124
+ num_key_value_heads=None,
125
+ resid_pdrop=0.0,
126
+ embd_pdrop=0.0,
127
+ attention_dropout=0.0,
128
+ hidden_act="silu",
129
+ max_position_embeddings=4096,
130
+ original_max_position_embeddings=4096,
131
+ initializer_range=0.02,
132
+ rms_norm_eps=1e-5,
133
+ use_cache=True,
134
+ tie_word_embeddings=False,
135
+ rope_theta=10000.0,
136
+ rope_scaling=None,
137
+ bos_token_id=1,
138
+ eos_token_id=32000,
139
+ pad_token_id=32000,
140
+ sliding_window=None,
141
+ **kwargs,
142
+ ):
143
+ self.vocab_size = vocab_size
144
+ self.hidden_size = hidden_size
145
+ self.intermediate_size = intermediate_size
146
+ self.num_hidden_layers = num_hidden_layers
147
+ self.num_attention_heads = num_attention_heads
148
+
149
+ if num_key_value_heads is None:
150
+ num_key_value_heads = num_attention_heads
151
+
152
+ self.num_key_value_heads = num_key_value_heads
153
+ self.resid_pdrop = resid_pdrop
154
+ self.embd_pdrop = embd_pdrop
155
+ self.attention_dropout = attention_dropout
156
+ self.hidden_act = hidden_act
157
+ self.max_position_embeddings = max_position_embeddings
158
+ self.original_max_position_embeddings = original_max_position_embeddings
159
+ self.initializer_range = initializer_range
160
+ self.rms_norm_eps = rms_norm_eps
161
+ self.use_cache = use_cache
162
+ self.rope_theta = rope_theta
163
+ self.rope_scaling = rope_scaling
164
+ self._rope_scaling_adjustment()
165
+ self._rope_scaling_validation()
166
+ self.sliding_window = sliding_window
167
+
168
+ super().__init__(
169
+ bos_token_id=bos_token_id,
170
+ eos_token_id=eos_token_id,
171
+ pad_token_id=pad_token_id,
172
+ tie_word_embeddings=tie_word_embeddings,
173
+ **kwargs,
174
+ )
175
+
176
+ def _rope_scaling_adjustment(self):
177
+ """
178
+ Adjust the `type` of the `rope_scaling` configuration for backward compatibility.
179
+ """
180
+ if self.rope_scaling is None:
181
+ return
182
+
183
+ rope_scaling_type = self.rope_scaling.get("type", None)
184
+
185
+ # For backward compatibility if previous version used "su" or "yarn"
186
+ if rope_scaling_type is not None and rope_scaling_type in ["su", "yarn"]:
187
+ self.rope_scaling["type"] = "longrope"
188
+
189
+ def _rope_scaling_validation(self):
190
+ """
191
+ Validate the `rope_scaling` configuration.
192
+ """
193
+ if self.rope_scaling is None:
194
+ return
195
+
196
+ if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 3:
197
+ raise ValueError(
198
+ "`rope_scaling` must be a dictionary with three fields, `type`, `short_factor` and `long_factor`, "
199
+ f"got {self.rope_scaling}"
200
+ )
201
+ rope_scaling_type = self.rope_scaling.get("type", None)
202
+ rope_scaling_short_factor = self.rope_scaling.get("short_factor", None)
203
+ rope_scaling_long_factor = self.rope_scaling.get("long_factor", None)
204
+ if rope_scaling_type is None or rope_scaling_type not in ["longrope"]:
205
+ raise ValueError(f"`rope_scaling`'s type field must be one of ['longrope'], got {rope_scaling_type}")
206
+ if not (
207
+ isinstance(rope_scaling_short_factor, list)
208
+ and all(isinstance(x, (int, float)) for x in rope_scaling_short_factor)
209
+ ):
210
+ raise ValueError(
211
+ f"`rope_scaling`'s short_factor field must be a list of numbers, got {rope_scaling_short_factor}"
212
+ )
213
+ if not len(rope_scaling_short_factor) == self.hidden_size // self.num_attention_heads // 2:
214
+ raise ValueError(
215
+ f"`rope_scaling`'s short_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_short_factor)}"
216
+ )
217
+ if not (
218
+ isinstance(rope_scaling_long_factor, list)
219
+ and all(isinstance(x, (int, float)) for x in rope_scaling_long_factor)
220
+ ):
221
+ raise ValueError(
222
+ f"`rope_scaling`'s long_factor field must be a list of numbers, got {rope_scaling_long_factor}"
223
+ )
224
+ if not len(rope_scaling_long_factor) == self.hidden_size // self.num_attention_heads // 2:
225
+ raise ValueError(
226
+ f"`rope_scaling`'s long_factor field must have length {self.hidden_size // self.num_attention_heads // 2}, got {len(rope_scaling_long_factor)}"
227
+ )
generation_config.json CHANGED
@@ -7,5 +7,5 @@
7
  32007
8
  ],
9
  "pad_token_id": 32000,
10
- "transformers_version": "4.41.2"
11
  }
 
7
  32007
8
  ],
9
  "pad_token_id": 32000,
10
+ "transformers_version": "4.45.2"
11
  }
openvino_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compression": null,
3
+ "dtype": "int4",
4
+ "input_info": null,
5
+ "optimum_version": "1.23.1",
6
+ "quantization_config": {
7
+ "all_layers": null,
8
+ "bits": 4,
9
+ "dataset": "wikitext2",
10
+ "gptq": null,
11
+ "group_size": 64,
12
+ "ignored_scope": null,
13
+ "num_samples": null,
14
+ "quant_method": "default",
15
+ "ratio": 1.0,
16
+ "scale_estimation": true,
17
+ "sensitivity_metric": null,
18
+ "sym": false,
19
+ "tokenizer": null,
20
+ "trust_remote_code": true,
21
+ "weight_format": "int4"
22
+ },
23
+ "save_onnx_model": false,
24
+ "transformers_version": "4.45.2"
25
+ }
openvino_detokenizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:630b6806812464da49d8dc0907d303055c3fa69f10b1f3533f6945437ab55b59
3
- size 499991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf0a5ac7698c27f1f3a8573b76a628e4a6a2c7eaddc7dd549ee3607a34d4061
3
+ size 339125
openvino_detokenizer.xml CHANGED
@@ -1,61 +1,235 @@
1
  <?xml version="1.0"?>
2
  <net name="detokenizer" version="11">
3
  <layers>
4
- <layer id="0" name="Parameter_37" type="Parameter" version="opset1">
5
  <data shape="?,?" element_type="i64" />
6
  <output>
7
- <port id="0" precision="I64" names="Parameter_37">
8
  <dim>-1</dim>
9
  <dim>-1</dim>
10
  </port>
11
  </output>
12
  </layer>
13
- <layer id="1" name="Constant_2" type="Const" version="opset1">
14
- <data element_type="u8" shape="499991" offset="0" size="499991" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  <output>
16
  <port id="0" precision="U8">
17
- <dim>499991</dim>
18
  </port>
19
  </output>
20
  </layer>
21
- <layer id="2" name="Convert_47" type="Convert" version="opset1">
22
- <data destination_type="i32" />
23
  <input>
24
- <port id="0" precision="I64">
 
 
 
 
 
25
  <dim>-1</dim>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  <dim>-1</dim>
27
  </port>
28
  </input>
29
  <output>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  <port id="1" precision="I32">
31
  <dim>-1</dim>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  <dim>-1</dim>
33
  </port>
34
  </output>
35
  </layer>
36
- <layer id="3" name="SentencepieceDetokenizer_38" type="SentencepieceDetokenizer" version="extension">
37
  <input>
38
- <port id="0" precision="U8">
39
- <dim>499991</dim>
40
  </port>
41
  <port id="1" precision="I32">
42
  <dim>-1</dim>
 
 
43
  <dim>-1</dim>
44
  </port>
45
  </input>
46
  <output>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  <port id="2" precision="I32">
48
  <dim>-1</dim>
49
  </port>
50
  <port id="3" precision="I32">
51
  <dim>-1</dim>
52
  </port>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  <port id="4" precision="U8">
 
 
 
 
 
 
 
 
 
 
 
54
  <dim>-1</dim>
55
  </port>
56
  </output>
57
  </layer>
58
- <layer id="4" name="StringTensorPack_39" type="StringTensorPack" version="extension">
59
  <data mode="begins_ends" />
60
  <input>
61
  <port id="0" precision="I32">
@@ -74,7 +248,7 @@
74
  </port>
75
  </output>
76
  </layer>
77
- <layer id="5" name="Result_40" type="Result" version="opset1">
78
  <input>
79
  <port id="0" precision="STRING">
80
  <dim>-1</dim>
@@ -83,13 +257,33 @@
83
  </layer>
84
  </layers>
85
  <edges>
86
- <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
87
- <edge from-layer="1" from-port="0" to-layer="3" to-port="0" />
88
- <edge from-layer="2" from-port="1" to-layer="3" to-port="1" />
89
- <edge from-layer="3" from-port="2" to-layer="4" to-port="0" />
90
- <edge from-layer="3" from-port="3" to-layer="4" to-port="1" />
91
- <edge from-layer="3" from-port="4" to-layer="4" to-port="2" />
92
- <edge from-layer="4" from-port="3" to-layer="5" to-port="0" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  </edges>
94
  <rt_info>
95
  <bos_token_id value="1" />
 
1
  <?xml version="1.0"?>
2
  <net name="detokenizer" version="11">
3
  <layers>
4
+ <layer id="0" name="Parameter_282016" type="Parameter" version="opset1">
5
  <data shape="?,?" element_type="i64" />
6
  <output>
7
+ <port id="0" precision="I64" names="Parameter_282016">
8
  <dim>-1</dim>
9
  <dim>-1</dim>
10
  </port>
11
  </output>
12
  </layer>
13
+ <layer id="1" name="Convert_282038" type="Convert" version="opset1">
14
+ <data destination_type="i32" />
15
+ <input>
16
+ <port id="0" precision="I64">
17
+ <dim>-1</dim>
18
+ <dim>-1</dim>
19
+ </port>
20
+ </input>
21
+ <output>
22
+ <port id="1" precision="I32">
23
+ <dim>-1</dim>
24
+ <dim>-1</dim>
25
+ </port>
26
+ </output>
27
+ </layer>
28
+ <layer id="2" name="Constant_281986" type="Const" version="opset1">
29
+ <data element_type="u8" shape="339118" offset="0" size="339118" />
30
  <output>
31
  <port id="0" precision="U8">
32
+ <dim>339118</dim>
33
  </port>
34
  </output>
35
  </layer>
36
+ <layer id="3" name="StringTensorUnpack_281987" type="StringTensorUnpack" version="extension">
37
+ <data mode="begins_ends" />
38
  <input>
39
+ <port id="0" precision="U8">
40
+ <dim>339118</dim>
41
+ </port>
42
+ </input>
43
+ <output>
44
+ <port id="1" precision="I32">
45
  <dim>-1</dim>
46
+ </port>
47
+ <port id="2" precision="I32">
48
+ <dim>-1</dim>
49
+ </port>
50
+ <port id="3" precision="U8">
51
+ <dim>-1</dim>
52
+ </port>
53
+ </output>
54
+ </layer>
55
+ <layer id="4" name="VocabDecoder_282017" type="VocabDecoder" version="extension">
56
+ <data skip_tokens="0, 1, 32000, 32001, 32002, 32003, 32004, 32005, 32006, 32007, 32008, 32009, 32010" />
57
+ <input>
58
+ <port id="0" precision="I32">
59
+ <dim>-1</dim>
60
+ <dim>-1</dim>
61
+ </port>
62
+ <port id="1" precision="I32">
63
+ <dim>-1</dim>
64
+ </port>
65
+ <port id="2" precision="I32">
66
+ <dim>-1</dim>
67
+ </port>
68
+ <port id="3" precision="U8">
69
  <dim>-1</dim>
70
  </port>
71
  </input>
72
  <output>
73
+ <port id="4" precision="I32">
74
+ <dim>-1</dim>
75
+ </port>
76
+ <port id="5" precision="I32">
77
+ <dim>-1</dim>
78
+ </port>
79
+ <port id="6" precision="I32">
80
+ <dim>-1</dim>
81
+ </port>
82
+ <port id="7" precision="I32">
83
+ <dim>-1</dim>
84
+ </port>
85
+ <port id="8" precision="U8">
86
+ <dim>-1</dim>
87
+ </port>
88
+ </output>
89
+ </layer>
90
+ <layer id="5" name="Constant_282019" type="Const" version="opset1">
91
+ <data element_type="u8" shape="3" offset="339118" size="3" />
92
+ <output>
93
+ <port id="0" precision="U8">
94
+ <dim>3</dim>
95
+ </port>
96
+ </output>
97
+ </layer>
98
+ <layer id="6" name="Constant_282021" type="Const" version="opset1">
99
+ <data element_type="u8" shape="1" offset="339121" size="1" />
100
+ <output>
101
+ <port id="0" precision="U8">
102
+ <dim>1</dim>
103
+ </port>
104
+ </output>
105
+ </layer>
106
+ <layer id="7" name="RegexNormalization_282022" type="RegexNormalization" version="extension">
107
+ <data global_replace="true" />
108
+ <input>
109
+ <port id="0" precision="I32">
110
+ <dim>-1</dim>
111
+ </port>
112
  <port id="1" precision="I32">
113
  <dim>-1</dim>
114
+ </port>
115
+ <port id="2" precision="U8">
116
+ <dim>-1</dim>
117
+ </port>
118
+ <port id="3" precision="U8">
119
+ <dim>3</dim>
120
+ </port>
121
+ <port id="4" precision="U8">
122
+ <dim>1</dim>
123
+ </port>
124
+ </input>
125
+ <output>
126
+ <port id="5" precision="I32">
127
+ <dim>-1</dim>
128
+ </port>
129
+ <port id="6" precision="I32">
130
+ <dim>-1</dim>
131
+ </port>
132
+ <port id="7" precision="U8">
133
  <dim>-1</dim>
134
  </port>
135
  </output>
136
  </layer>
137
+ <layer id="8" name="ByteFallback_282023" type="ByteFallback" version="extension">
138
  <input>
139
+ <port id="0" precision="I32">
140
+ <dim>-1</dim>
141
  </port>
142
  <port id="1" precision="I32">
143
  <dim>-1</dim>
144
+ </port>
145
+ <port id="2" precision="U8">
146
  <dim>-1</dim>
147
  </port>
148
  </input>
149
  <output>
150
+ <port id="3" precision="I32">
151
+ <dim>-1</dim>
152
+ </port>
153
+ <port id="4" precision="I32">
154
+ <dim>-1</dim>
155
+ </port>
156
+ <port id="5" precision="U8">
157
+ <dim>-1</dim>
158
+ </port>
159
+ </output>
160
+ </layer>
161
+ <layer id="9" name="FuzeRagged_282024" type="FuzeRagged" version="extension">
162
+ <input>
163
+ <port id="0" precision="I32">
164
+ <dim>-1</dim>
165
+ </port>
166
+ <port id="1" precision="I32">
167
+ <dim>-1</dim>
168
+ </port>
169
  <port id="2" precision="I32">
170
  <dim>-1</dim>
171
  </port>
172
  <port id="3" precision="I32">
173
  <dim>-1</dim>
174
  </port>
175
+ </input>
176
+ <output>
177
+ <port id="4" precision="I32">
178
+ <dim>-1</dim>
179
+ </port>
180
+ <port id="5" precision="I32">
181
+ <dim>-1</dim>
182
+ </port>
183
+ </output>
184
+ </layer>
185
+ <layer id="10" name="Constant_282026" type="Const" version="opset1">
186
+ <data element_type="u8" shape="2" offset="339122" size="2" />
187
+ <output>
188
+ <port id="0" precision="U8">
189
+ <dim>2</dim>
190
+ </port>
191
+ </output>
192
+ </layer>
193
+ <layer id="11" name="Constant_282028" type="Const" version="opset1">
194
+ <data element_type="u8" shape="0" offset="339124" size="1" />
195
+ <output>
196
+ <port id="0" precision="U8">
197
+ <dim>0</dim>
198
+ </port>
199
+ </output>
200
+ </layer>
201
+ <layer id="12" name="RegexNormalization_282029" type="RegexNormalization" version="extension">
202
+ <data global_replace="true" />
203
+ <input>
204
+ <port id="0" precision="I32">
205
+ <dim>-1</dim>
206
+ </port>
207
+ <port id="1" precision="I32">
208
+ <dim>-1</dim>
209
+ </port>
210
+ <port id="2" precision="U8">
211
+ <dim>-1</dim>
212
+ </port>
213
+ <port id="3" precision="U8">
214
+ <dim>2</dim>
215
+ </port>
216
  <port id="4" precision="U8">
217
+ <dim>0</dim>
218
+ </port>
219
+ </input>
220
+ <output>
221
+ <port id="5" precision="I32">
222
+ <dim>-1</dim>
223
+ </port>
224
+ <port id="6" precision="I32">
225
+ <dim>-1</dim>
226
+ </port>
227
+ <port id="7" precision="U8">
228
  <dim>-1</dim>
229
  </port>
230
  </output>
231
  </layer>
232
+ <layer id="13" name="StringTensorPack_282030" type="StringTensorPack" version="extension">
233
  <data mode="begins_ends" />
234
  <input>
235
  <port id="0" precision="I32">
 
248
  </port>
249
  </output>
250
  </layer>
251
+ <layer id="14" name="Result_282031" type="Result" version="opset1">
252
  <input>
253
  <port id="0" precision="STRING">
254
  <dim>-1</dim>
 
257
  </layer>
258
  </layers>
259
  <edges>
260
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
261
+ <edge from-layer="1" from-port="1" to-layer="4" to-port="0" />
262
+ <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
263
+ <edge from-layer="3" from-port="1" to-layer="4" to-port="1" />
264
+ <edge from-layer="3" from-port="2" to-layer="4" to-port="2" />
265
+ <edge from-layer="3" from-port="3" to-layer="4" to-port="3" />
266
+ <edge from-layer="4" from-port="6" to-layer="7" to-port="0" />
267
+ <edge from-layer="4" from-port="7" to-layer="7" to-port="1" />
268
+ <edge from-layer="4" from-port="8" to-layer="7" to-port="2" />
269
+ <edge from-layer="4" from-port="5" to-layer="9" to-port="1" />
270
+ <edge from-layer="4" from-port="4" to-layer="9" to-port="0" />
271
+ <edge from-layer="5" from-port="0" to-layer="7" to-port="3" />
272
+ <edge from-layer="6" from-port="0" to-layer="7" to-port="4" />
273
+ <edge from-layer="7" from-port="7" to-layer="8" to-port="2" />
274
+ <edge from-layer="7" from-port="6" to-layer="8" to-port="1" />
275
+ <edge from-layer="7" from-port="5" to-layer="8" to-port="0" />
276
+ <edge from-layer="8" from-port="3" to-layer="9" to-port="2" />
277
+ <edge from-layer="8" from-port="4" to-layer="9" to-port="3" />
278
+ <edge from-layer="8" from-port="5" to-layer="12" to-port="2" />
279
+ <edge from-layer="9" from-port="4" to-layer="12" to-port="0" />
280
+ <edge from-layer="9" from-port="5" to-layer="12" to-port="1" />
281
+ <edge from-layer="10" from-port="0" to-layer="12" to-port="3" />
282
+ <edge from-layer="11" from-port="0" to-layer="12" to-port="4" />
283
+ <edge from-layer="12" from-port="5" to-layer="13" to-port="0" />
284
+ <edge from-layer="12" from-port="6" to-layer="13" to-port="1" />
285
+ <edge from-layer="12" from-port="7" to-layer="13" to-port="2" />
286
+ <edge from-layer="13" from-port="3" to-layer="14" to-port="0" />
287
  </edges>
288
  <rt_info>
289
  <bos_token_id value="1" />
openvino_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22291c7526fdd7b67a17f415287c6c8b473fae8fcd7afa4887e3ee436d306ea2
3
- size 2450248240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4966cb2c4f416cb20b934a7130d4aa5aefa5007b56c817b55dcfe53b1506c415
3
+ size 2151489888
openvino_model.xml CHANGED
The diff for this file is too large to render. See raw diff
 
openvino_tokenizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:996b60b8de0ee7597bb22dadabc6bed436a4255e0b2e2cbc7a11ad7ffbad8613
3
- size 500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012a18fef2df281b8293a03448630081625d563d29d199b482a64751609a5698
3
+ size 1262043
openvino_tokenizer.xml CHANGED
@@ -1,64 +1,204 @@
1
  <?xml version="1.0"?>
2
  <net name="tokenizer" version="11">
3
  <layers>
4
- <layer id="0" name="string_input" type="Parameter" version="opset1">
5
  <data shape="?" element_type="string" />
6
  <output>
7
- <port id="0" precision="STRING" names="string_input">
8
  <dim>-1</dim>
9
  </port>
10
  </output>
11
  </layer>
12
- <layer id="1" name="Constant_16" type="Const" version="opset1">
13
- <data element_type="i32" shape="" offset="0" size="4" />
14
  <output>
15
- <port id="0" precision="I32" />
16
  </output>
17
  </layer>
18
- <layer id="2" name="Constant_1" type="Const" version="opset1">
19
- <data element_type="u8" shape="499969" offset="4" size="499969" />
 
 
 
 
 
20
  <output>
21
- <port id="0" precision="U8">
22
- <dim>499969</dim>
 
 
 
 
 
 
23
  </port>
24
  </output>
25
  </layer>
26
- <layer id="3" name="StringTensorUnpack_4" type="StringTensorUnpack" version="extension">
27
- <data mode="begins_ends" />
28
  <input>
29
- <port id="0" precision="STRING">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  <dim>-1</dim>
31
  </port>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  </input>
33
  <output>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  <port id="1" precision="I32">
35
  <dim>-1</dim>
36
  </port>
37
  <port id="2" precision="I32">
38
  <dim>-1</dim>
39
  </port>
40
- <port id="3" precision="U8">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  <dim>-1</dim>
42
  </port>
43
  </output>
44
  </layer>
45
- <layer id="4" name="Constant_6" type="Const" version="opset1">
46
- <data element_type="u8" shape="7" offset="499973" size="7" />
47
  <output>
48
  <port id="0" precision="U8">
49
  <dim>7</dim>
50
  </port>
51
  </output>
52
  </layer>
53
- <layer id="5" name="Constant_8" type="Const" version="opset1">
54
- <data element_type="u8" shape="3" offset="499980" size="3" />
55
  <output>
56
  <port id="0" precision="U8">
57
- <dim>3</dim>
58
  </port>
59
  </output>
60
  </layer>
61
- <layer id="6" name="RegexNormalization_9" type="RegexNormalization" version="extension">
62
  <data global_replace="true" />
63
  <input>
64
  <port id="0" precision="I32">
@@ -70,27 +210,49 @@
70
  <port id="2" precision="U8">
71
  <dim>-1</dim>
72
  </port>
73
- <port id="3" precision="U8">
74
- <dim>7</dim>
75
  </port>
76
  <port id="4" precision="U8">
77
- <dim>3</dim>
 
 
 
78
  </port>
79
  </input>
80
  <output>
81
- <port id="5" precision="I32">
82
  <dim>-1</dim>
83
  </port>
84
- <port id="6" precision="I32">
85
  <dim>-1</dim>
86
  </port>
87
- <port id="7" precision="U8">
 
 
 
88
  <dim>-1</dim>
89
  </port>
90
  </output>
91
  </layer>
92
- <layer id="7" name="StringTensorPack_10" type="StringTensorPack" version="extension">
93
- <data mode="begins_ends" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  <input>
95
  <port id="0" precision="I32">
96
  <dim>-1</dim>
@@ -101,26 +263,44 @@
101
  <port id="2" precision="U8">
102
  <dim>-1</dim>
103
  </port>
 
 
 
 
 
 
 
 
 
104
  </input>
105
  <output>
106
- <port id="3" precision="STRING">
 
 
 
 
 
 
 
 
 
107
  <dim>-1</dim>
108
  </port>
109
  </output>
110
  </layer>
111
- <layer id="8" name="Constant_12" type="Const" version="opset1">
112
- <data element_type="u8" shape="223" offset="499983" size="223" />
113
  <output>
114
  <port id="0" precision="U8">
115
- <dim>223</dim>
116
  </port>
117
  </output>
118
  </layer>
119
- <layer id="9" name="StringTensorUnpack_13" type="StringTensorUnpack" version="extension">
120
  <data mode="begins_ends" />
121
  <input>
122
  <port id="0" precision="U8">
123
- <dim>223</dim>
124
  </port>
125
  </input>
126
  <output>
@@ -135,89 +315,173 @@
135
  </port>
136
  </output>
137
  </layer>
138
- <layer id="10" name="Constant_14" type="Const" version="opset1">
139
- <data element_type="i32" shape="14" offset="500206" size="56" />
140
  <output>
141
- <port id="0" precision="I32">
142
- <dim>14</dim>
143
  </port>
144
  </output>
145
  </layer>
146
- <layer id="11" name="SentencepieceTokenizer_15" type="SentencepieceTokenizer" version="extension">
147
- <data nbest_size="0" alpha="0" add_bos="false" add_eos="false" reverse="true" />
148
  <input>
149
  <port id="0" precision="U8">
150
- <dim>499969</dim>
151
  </port>
152
- <port id="1" precision="STRING">
 
 
153
  <dim>-1</dim>
154
  </port>
155
  <port id="2" precision="I32">
156
  <dim>-1</dim>
157
  </port>
158
- <port id="3" precision="I32">
159
  <dim>-1</dim>
160
  </port>
161
- <port id="4" precision="U8">
162
- <dim>-1</dim>
 
 
 
 
 
163
  </port>
164
- <port id="5" precision="I32">
165
- <dim>14</dim>
 
 
 
 
 
166
  </port>
167
  </input>
168
  <output>
169
- <port id="6" precision="I64">
170
  <dim>-1</dim>
171
- <dim>2</dim>
172
  </port>
173
- <port id="7" precision="I32">
174
  <dim>-1</dim>
175
  </port>
176
- <port id="8" precision="I64">
177
- <dim>2</dim>
178
  </port>
179
  </output>
180
  </layer>
181
- <layer id="12" name="Broadcast_17" type="Broadcast" version="opset3">
182
- <data mode="numpy" />
 
 
 
 
 
 
 
 
183
  <input>
184
- <port id="0" precision="I32" />
185
- <port id="1" precision="I64">
186
- <dim>2</dim>
187
  </port>
188
  </input>
189
  <output>
 
 
 
190
  <port id="2" precision="I32">
191
  <dim>-1</dim>
 
 
192
  <dim>-1</dim>
193
  </port>
194
  </output>
195
  </layer>
196
- <layer id="13" name="Constant_18" type="Const" version="opset1">
197
- <data element_type="i32" shape="" offset="500262" size="4" />
198
  <output>
199
- <port id="0" precision="I32" />
 
 
200
  </output>
201
  </layer>
202
- <layer id="14" name="ShapeOf_19" type="ShapeOf" version="opset3">
203
- <data output_type="i64" />
204
  <input>
205
  <port id="0" precision="I32">
206
  <dim>-1</dim>
207
  </port>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  </input>
209
  <output>
210
- <port id="1" precision="I64">
211
- <dim>1</dim>
 
 
 
 
 
 
212
  </port>
213
  </output>
214
  </layer>
215
- <layer id="15" name="Broadcast_20" type="Broadcast" version="opset3">
216
- <data mode="numpy" />
217
  <input>
218
- <port id="0" precision="I32" />
219
- <port id="1" precision="I64">
220
- <dim>1</dim>
 
 
221
  </port>
222
  </input>
223
  <output>
@@ -226,137 +490,183 @@
226
  </port>
227
  </output>
228
  </layer>
229
- <layer id="16" name="ScatterNDUpdate_24" type="ScatterNDUpdate" version="opset4">
 
 
 
 
 
 
 
230
  <input>
231
  <port id="0" precision="I32">
232
  <dim>-1</dim>
 
 
 
 
 
233
  <dim>-1</dim>
234
  </port>
235
- <port id="1" precision="I64">
 
 
 
 
 
236
  <dim>-1</dim>
237
- <dim>2</dim>
238
  </port>
239
- <port id="2" precision="I32">
240
  <dim>-1</dim>
241
  </port>
242
  </input>
243
  <output>
244
- <port id="3" precision="I32">
245
- <dim>-1</dim>
246
  <dim>-1</dim>
247
  </port>
248
  </output>
249
  </layer>
250
- <layer id="17" name="Constant_28" type="Const" version="opset1">
251
- <data element_type="i64" shape="1" offset="500266" size="8" />
252
  <output>
253
- <port id="0" precision="I64">
254
  <dim>1</dim>
255
  </port>
256
  </output>
257
  </layer>
258
- <layer id="18" name="Reverse_29" type="Reverse" version="opset1">
259
- <data mode="index" />
260
  <input>
261
  <port id="0" precision="I32">
262
  <dim>-1</dim>
 
 
263
  <dim>-1</dim>
264
  </port>
265
- <port id="1" precision="I64">
 
 
 
266
  <dim>1</dim>
267
  </port>
268
  </input>
269
  <output>
270
- <port id="2" precision="I32">
 
 
 
 
 
 
 
 
 
271
  <dim>-1</dim>
 
 
 
 
 
272
  <dim>-1</dim>
273
  </port>
274
  </output>
275
  </layer>
276
- <layer id="19" name="Reverse_29" type="Convert" version="opset1">
277
- <data destination_type="i64" />
278
  <input>
279
  <port id="0" precision="I32">
280
  <dim>-1</dim>
 
 
281
  <dim>-1</dim>
282
  </port>
283
  </input>
284
  <output>
285
- <port id="1" precision="I64" names="attention_mask">
286
- <dim>-1</dim>
287
  <dim>-1</dim>
288
  </port>
289
  </output>
290
  </layer>
291
- <layer id="21" name="Constant_25" type="Const" version="opset1">
292
- <data element_type="i32" shape="" offset="500274" size="4" />
293
  <output>
294
  <port id="0" precision="I32" />
295
  </output>
296
  </layer>
297
- <layer id="22" name="Broadcast_26" type="Broadcast" version="opset3">
298
- <data mode="bidirectional" />
299
  <input>
300
- <port id="0" precision="I32" />
301
- <port id="1" precision="I64">
302
- <dim>2</dim>
303
  </port>
 
304
  </input>
305
  <output>
306
- <port id="2" precision="I32">
307
- <dim>-1</dim>
308
- <dim>-1</dim>
309
- </port>
 
 
 
310
  </output>
311
  </layer>
312
- <layer id="23" name="ScatterNDUpdate_27" type="ScatterNDUpdate" version="opset4">
 
313
  <input>
314
  <port id="0" precision="I32">
315
  <dim>-1</dim>
316
- <dim>-1</dim>
317
  </port>
318
- <port id="1" precision="I64">
319
  <dim>-1</dim>
320
- <dim>2</dim>
321
  </port>
322
  <port id="2" precision="I32">
323
  <dim>-1</dim>
324
  </port>
 
 
325
  </input>
326
  <output>
327
- <port id="3" precision="I32">
 
 
 
 
328
  <dim>-1</dim>
329
  <dim>-1</dim>
330
  </port>
331
  </output>
332
  </layer>
333
- <layer id="24" name="Constant_30" type="Const" version="opset1">
334
- <data element_type="i64" shape="1" offset="500266" size="8" />
 
 
 
 
 
 
335
  <output>
336
- <port id="0" precision="I64">
337
- <dim>1</dim>
 
338
  </port>
339
  </output>
340
  </layer>
341
- <layer id="25" name="Reverse_31" type="Reverse" version="opset1">
342
- <data mode="index" />
343
  <input>
344
  <port id="0" precision="I32">
345
  <dim>-1</dim>
346
  <dim>-1</dim>
347
  </port>
348
- <port id="1" precision="I64">
349
- <dim>1</dim>
350
- </port>
351
  </input>
352
  <output>
353
- <port id="2" precision="I32">
354
  <dim>-1</dim>
355
  <dim>-1</dim>
356
  </port>
357
  </output>
358
  </layer>
359
- <layer id="26" name="Reverse_31" type="Convert" version="opset1">
360
  <data destination_type="i64" />
361
  <input>
362
  <port id="0" precision="I32">
@@ -371,7 +681,7 @@
371
  </port>
372
  </output>
373
  </layer>
374
- <layer id="27" name="Result_32" type="Result" version="opset1">
375
  <input>
376
  <port id="0" precision="I64">
377
  <dim>-1</dim>
@@ -379,7 +689,7 @@
379
  </port>
380
  </input>
381
  </layer>
382
- <layer id="20" name="Result_33" type="Result" version="opset1">
383
  <input>
384
  <port id="0" precision="I64">
385
  <dim>-1</dim>
@@ -389,43 +699,83 @@
389
  </layer>
390
  </layers>
391
  <edges>
392
- <edge from-layer="0" from-port="0" to-layer="3" to-port="0" />
393
- <edge from-layer="1" from-port="0" to-layer="12" to-port="0" />
394
- <edge from-layer="2" from-port="0" to-layer="11" to-port="0" />
 
 
 
395
  <edge from-layer="3" from-port="1" to-layer="6" to-port="0" />
396
- <edge from-layer="3" from-port="2" to-layer="6" to-port="1" />
397
- <edge from-layer="3" from-port="3" to-layer="6" to-port="2" />
398
- <edge from-layer="4" from-port="0" to-layer="6" to-port="3" />
399
- <edge from-layer="5" from-port="0" to-layer="6" to-port="4" />
400
- <edge from-layer="6" from-port="5" to-layer="7" to-port="0" />
401
- <edge from-layer="6" from-port="6" to-layer="7" to-port="1" />
402
- <edge from-layer="6" from-port="7" to-layer="7" to-port="2" />
403
- <edge from-layer="7" from-port="3" to-layer="11" to-port="1" />
404
- <edge from-layer="8" from-port="0" to-layer="9" to-port="0" />
405
- <edge from-layer="9" from-port="1" to-layer="11" to-port="2" />
406
- <edge from-layer="9" from-port="2" to-layer="11" to-port="3" />
407
- <edge from-layer="9" from-port="3" to-layer="11" to-port="4" />
408
- <edge from-layer="10" from-port="0" to-layer="11" to-port="5" />
409
- <edge from-layer="11" from-port="7" to-layer="14" to-port="0" />
410
- <edge from-layer="11" from-port="7" to-layer="23" to-port="2" />
411
- <edge from-layer="11" from-port="6" to-layer="23" to-port="1" />
412
- <edge from-layer="11" from-port="8" to-layer="22" to-port="1" />
413
- <edge from-layer="11" from-port="6" to-layer="16" to-port="1" />
414
- <edge from-layer="11" from-port="8" to-layer="12" to-port="1" />
415
- <edge from-layer="12" from-port="2" to-layer="16" to-port="0" />
416
- <edge from-layer="13" from-port="0" to-layer="15" to-port="0" />
417
- <edge from-layer="14" from-port="1" to-layer="15" to-port="1" />
418
- <edge from-layer="15" from-port="2" to-layer="16" to-port="2" />
419
- <edge from-layer="16" from-port="3" to-layer="18" to-port="0" />
420
- <edge from-layer="17" from-port="0" to-layer="18" to-port="1" />
421
- <edge from-layer="18" from-port="2" to-layer="19" to-port="0" />
422
- <edge from-layer="19" from-port="1" to-layer="20" to-port="0" />
423
- <edge from-layer="21" from-port="0" to-layer="22" to-port="0" />
424
- <edge from-layer="22" from-port="2" to-layer="23" to-port="0" />
425
- <edge from-layer="23" from-port="3" to-layer="25" to-port="0" />
426
- <edge from-layer="24" from-port="0" to-layer="25" to-port="1" />
427
- <edge from-layer="25" from-port="2" to-layer="26" to-port="0" />
428
- <edge from-layer="26" from-port="1" to-layer="27" to-port="0" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  </edges>
430
  <rt_info>
431
  <bos_token_id value="1" />
 
1
  <?xml version="1.0"?>
2
  <net name="tokenizer" version="11">
3
  <layers>
4
+ <layer id="0" name="Parameter_281898" type="Parameter" version="opset1">
5
  <data shape="?" element_type="string" />
6
  <output>
7
+ <port id="0" precision="STRING" names="Parameter_281898">
8
  <dim>-1</dim>
9
  </port>
10
  </output>
11
  </layer>
12
+ <layer id="1" name="Constant_281904" type="Const" version="opset1">
13
+ <data element_type="i64" shape="" offset="0" size="8" />
14
  <output>
15
+ <port id="0" precision="I64" />
16
  </output>
17
  </layer>
18
+ <layer id="2" name="StringTensorUnpack_281899" type="StringTensorUnpack" version="extension">
19
+ <data mode="begins_ends" />
20
+ <input>
21
+ <port id="0" precision="STRING">
22
+ <dim>-1</dim>
23
+ </port>
24
+ </input>
25
  <output>
26
+ <port id="1" precision="I32">
27
+ <dim>-1</dim>
28
+ </port>
29
+ <port id="2" precision="I32">
30
+ <dim>-1</dim>
31
+ </port>
32
+ <port id="3" precision="U8">
33
+ <dim>-1</dim>
34
  </port>
35
  </output>
36
  </layer>
37
+ <layer id="3" name="ShapeOf_281900" type="ShapeOf" version="opset3">
38
+ <data output_type="i64" />
39
  <input>
40
+ <port id="0" precision="I32">
41
+ <dim>-1</dim>
42
+ </port>
43
+ </input>
44
+ <output>
45
+ <port id="1" precision="I64">
46
+ <dim>1</dim>
47
+ </port>
48
+ </output>
49
+ </layer>
50
+ <layer id="4" name="Constant_281901" type="Const" version="opset1">
51
+ <data element_type="i64" shape="" offset="0" size="8" />
52
+ <output>
53
+ <port id="0" precision="I64" />
54
+ </output>
55
+ </layer>
56
+ <layer id="5" name="Constant_281902" type="Const" version="opset1">
57
+ <data element_type="i64" shape="" offset="0" size="8" />
58
+ <output>
59
+ <port id="0" precision="I64" />
60
+ </output>
61
+ </layer>
62
+ <layer id="6" name="Gather_281903" type="Gather" version="opset8">
63
+ <data batch_dims="0" />
64
+ <input>
65
+ <port id="0" precision="I64">
66
+ <dim>1</dim>
67
+ </port>
68
+ <port id="1" precision="I64" />
69
+ <port id="2" precision="I64" />
70
+ </input>
71
+ <output>
72
+ <port id="3" precision="I64" />
73
+ </output>
74
+ </layer>
75
+ <layer id="7" name="Constant_281905" type="Const" version="opset1">
76
+ <data element_type="i64" shape="" offset="8" size="8" />
77
+ <output>
78
+ <port id="0" precision="I64" />
79
+ </output>
80
+ </layer>
81
+ <layer id="8" name="Range_281906" type="Range" version="opset4">
82
+ <data output_type="i32" />
83
+ <input>
84
+ <port id="0" precision="I64" />
85
+ <port id="1" precision="I64" />
86
+ <port id="2" precision="I64" />
87
+ </input>
88
+ <output>
89
+ <port id="3" precision="I32">
90
  <dim>-1</dim>
91
  </port>
92
+ </output>
93
+ </layer>
94
+ <layer id="9" name="Constant_281907" type="Const" version="opset1">
95
+ <data element_type="i64" shape="" offset="8" size="8" />
96
+ <output>
97
+ <port id="0" precision="I64" />
98
+ </output>
99
+ </layer>
100
+ <layer id="10" name="Constant_281908" type="Const" version="opset1">
101
+ <data element_type="i64" shape="" offset="8" size="8" />
102
+ <output>
103
+ <port id="0" precision="I64" />
104
+ </output>
105
+ </layer>
106
+ <layer id="11" name="Add_281909" type="Add" version="opset1">
107
+ <data auto_broadcast="numpy" />
108
+ <input>
109
+ <port id="0" precision="I64" />
110
+ <port id="1" precision="I64" />
111
  </input>
112
  <output>
113
+ <port id="2" precision="I64" />
114
+ </output>
115
+ </layer>
116
+ <layer id="12" name="Constant_281910" type="Const" version="opset1">
117
+ <data element_type="i64" shape="" offset="8" size="8" />
118
+ <output>
119
+ <port id="0" precision="I64" />
120
+ </output>
121
+ </layer>
122
+ <layer id="13" name="Range_281911" type="Range" version="opset4">
123
+ <data output_type="i32" />
124
+ <input>
125
+ <port id="0" precision="I64" />
126
+ <port id="1" precision="I64" />
127
+ <port id="2" precision="I64" />
128
+ </input>
129
+ <output>
130
+ <port id="3" precision="I32">
131
+ <dim>-1</dim>
132
+ </port>
133
+ </output>
134
+ </layer>
135
+ <layer id="14" name="Constant_281973" type="Const" version="opset1">
136
+ <data element_type="u8" shape="328" offset="16" size="328" />
137
+ <output>
138
+ <port id="0" precision="U8">
139
+ <dim>328</dim>
140
+ </port>
141
+ </output>
142
+ </layer>
143
+ <layer id="15" name="SpecialTokensSplit_281974" type="SpecialTokensSplit" version="extension">
144
+ <input>
145
+ <port id="0" precision="I32">
146
+ <dim>-1</dim>
147
+ </port>
148
  <port id="1" precision="I32">
149
  <dim>-1</dim>
150
  </port>
151
  <port id="2" precision="I32">
152
  <dim>-1</dim>
153
  </port>
154
+ <port id="3" precision="I32">
155
+ <dim>-1</dim>
156
+ </port>
157
+ <port id="4" precision="U8">
158
+ <dim>-1</dim>
159
+ </port>
160
+ <port id="5" precision="U8">
161
+ <dim>328</dim>
162
+ </port>
163
+ </input>
164
+ <output>
165
+ <port id="6" precision="I32">
166
+ <dim>-1</dim>
167
+ </port>
168
+ <port id="7" precision="I32">
169
+ <dim>-1</dim>
170
+ </port>
171
+ <port id="8" precision="I32">
172
+ <dim>-1</dim>
173
+ </port>
174
+ <port id="9" precision="I32">
175
+ <dim>-1</dim>
176
+ </port>
177
+ <port id="10" precision="U8">
178
+ <dim>-1</dim>
179
+ </port>
180
+ <port id="11" precision="BOOL">
181
  <dim>-1</dim>
182
  </port>
183
  </output>
184
  </layer>
185
+ <layer id="16" name="Constant_281976" type="Const" version="opset1">
186
+ <data element_type="u8" shape="7" offset="344" size="7" />
187
  <output>
188
  <port id="0" precision="U8">
189
  <dim>7</dim>
190
  </port>
191
  </output>
192
  </layer>
193
+ <layer id="17" name="Constant_281978" type="Const" version="opset1">
194
+ <data element_type="u8" shape="5" offset="351" size="5" />
195
  <output>
196
  <port id="0" precision="U8">
197
+ <dim>5</dim>
198
  </port>
199
  </output>
200
  </layer>
201
+ <layer id="18" name="RegexNormalization_281979" type="RegexNormalization" version="extension">
202
  <data global_replace="true" />
203
  <input>
204
  <port id="0" precision="I32">
 
210
  <port id="2" precision="U8">
211
  <dim>-1</dim>
212
  </port>
213
+ <port id="3" precision="BOOL">
214
+ <dim>-1</dim>
215
  </port>
216
  <port id="4" precision="U8">
217
+ <dim>7</dim>
218
+ </port>
219
+ <port id="5" precision="U8">
220
+ <dim>5</dim>
221
  </port>
222
  </input>
223
  <output>
224
+ <port id="6" precision="I32">
225
  <dim>-1</dim>
226
  </port>
227
+ <port id="7" precision="I32">
228
  <dim>-1</dim>
229
  </port>
230
+ <port id="8" precision="U8">
231
+ <dim>-1</dim>
232
+ </port>
233
+ <port id="9" precision="BOOL">
234
  <dim>-1</dim>
235
  </port>
236
  </output>
237
  </layer>
238
+ <layer id="19" name="Constant_281981" type="Const" version="opset1">
239
+ <data element_type="u8" shape="1" offset="356" size="1" />
240
+ <output>
241
+ <port id="0" precision="U8">
242
+ <dim>1</dim>
243
+ </port>
244
+ </output>
245
+ </layer>
246
+ <layer id="20" name="Constant_281983" type="Const" version="opset1">
247
+ <data element_type="u8" shape="3" offset="357" size="3" />
248
+ <output>
249
+ <port id="0" precision="U8">
250
+ <dim>3</dim>
251
+ </port>
252
+ </output>
253
+ </layer>
254
+ <layer id="21" name="RegexNormalization_281984" type="RegexNormalization" version="extension">
255
+ <data global_replace="true" />
256
  <input>
257
  <port id="0" precision="I32">
258
  <dim>-1</dim>
 
263
  <port id="2" precision="U8">
264
  <dim>-1</dim>
265
  </port>
266
+ <port id="3" precision="BOOL">
267
+ <dim>-1</dim>
268
+ </port>
269
+ <port id="4" precision="U8">
270
+ <dim>1</dim>
271
+ </port>
272
+ <port id="5" precision="U8">
273
+ <dim>3</dim>
274
+ </port>
275
  </input>
276
  <output>
277
+ <port id="6" precision="I32">
278
+ <dim>-1</dim>
279
+ </port>
280
+ <port id="7" precision="I32">
281
+ <dim>-1</dim>
282
+ </port>
283
+ <port id="8" precision="U8">
284
+ <dim>-1</dim>
285
+ </port>
286
+ <port id="9" precision="BOOL">
287
  <dim>-1</dim>
288
  </port>
289
  </output>
290
  </layer>
291
+ <layer id="22" name="Constant_281986" type="Const" version="opset1">
292
+ <data element_type="u8" shape="339118" offset="360" size="339118" />
293
  <output>
294
  <port id="0" precision="U8">
295
+ <dim>339118</dim>
296
  </port>
297
  </output>
298
  </layer>
299
+ <layer id="23" name="StringTensorUnpack_281987" type="StringTensorUnpack" version="extension">
300
  <data mode="begins_ends" />
301
  <input>
302
  <port id="0" precision="U8">
303
+ <dim>339118</dim>
304
  </port>
305
  </input>
306
  <output>
 
315
  </port>
316
  </output>
317
  </layer>
318
+ <layer id="24" name="Constant_281992" type="Const" version="opset1">
319
+ <data element_type="u8" shape="499127" offset="339478" size="499127" />
320
  <output>
321
+ <port id="0" precision="U8">
322
+ <dim>499127</dim>
323
  </port>
324
  </output>
325
  </layer>
326
+ <layer id="25" name="StringTensorUnpack_281993" type="StringTensorUnpack" version="extension">
327
+ <data mode="begins_ends" />
328
  <input>
329
  <port id="0" precision="U8">
330
+ <dim>499127</dim>
331
  </port>
332
+ </input>
333
+ <output>
334
+ <port id="1" precision="I32">
335
  <dim>-1</dim>
336
  </port>
337
  <port id="2" precision="I32">
338
  <dim>-1</dim>
339
  </port>
340
+ <port id="3" precision="U8">
341
  <dim>-1</dim>
342
  </port>
343
+ </output>
344
+ </layer>
345
+ <layer id="26" name="Constant_281995" type="Const" version="opset1">
346
+ <data element_type="u8" shape="412810" offset="838605" size="412810" />
347
+ <output>
348
+ <port id="0" precision="U8">
349
+ <dim>412810</dim>
350
  </port>
351
+ </output>
352
+ </layer>
353
+ <layer id="27" name="StringTensorUnpack_281996" type="StringTensorUnpack" version="extension">
354
+ <data mode="begins_ends" />
355
+ <input>
356
+ <port id="0" precision="U8">
357
+ <dim>412810</dim>
358
  </port>
359
  </input>
360
  <output>
361
+ <port id="1" precision="I32">
362
  <dim>-1</dim>
 
363
  </port>
364
+ <port id="2" precision="I32">
365
  <dim>-1</dim>
366
  </port>
367
+ <port id="3" precision="U8">
368
+ <dim>-1</dim>
369
  </port>
370
  </output>
371
  </layer>
372
+ <layer id="28" name="Constant_281989" type="Const" version="opset1">
373
+ <data element_type="u8" shape="8716" offset="1251415" size="8716" />
374
+ <output>
375
+ <port id="0" precision="U8">
376
+ <dim>8716</dim>
377
+ </port>
378
+ </output>
379
+ </layer>
380
+ <layer id="29" name="StringTensorUnpack_281990" type="StringTensorUnpack" version="extension">
381
+ <data mode="begins_ends" />
382
  <input>
383
+ <port id="0" precision="U8">
384
+ <dim>8716</dim>
 
385
  </port>
386
  </input>
387
  <output>
388
+ <port id="1" precision="I32">
389
+ <dim>-1</dim>
390
+ </port>
391
  <port id="2" precision="I32">
392
  <dim>-1</dim>
393
+ </port>
394
+ <port id="3" precision="U8">
395
  <dim>-1</dim>
396
  </port>
397
  </output>
398
  </layer>
399
+ <layer id="30" name="Constant_281997" type="Const" version="opset1">
400
+ <data element_type="i32" shape="475" offset="1260131" size="1900" />
401
  <output>
402
+ <port id="0" precision="I32">
403
+ <dim>475</dim>
404
+ </port>
405
  </output>
406
  </layer>
407
+ <layer id="31" name="BPETokenizer_281998" type="BPETokenizer" version="extension">
408
+ <data unk_token="&lt;unk>" fuse_unk="true" suffix_indicator="" end_suffix="" byte_fallback="true" cache_capacity="20000" />
409
  <input>
410
  <port id="0" precision="I32">
411
  <dim>-1</dim>
412
  </port>
413
+ <port id="1" precision="I32">
414
+ <dim>-1</dim>
415
+ </port>
416
+ <port id="2" precision="I32">
417
+ <dim>-1</dim>
418
+ </port>
419
+ <port id="3" precision="I32">
420
+ <dim>-1</dim>
421
+ </port>
422
+ <port id="4" precision="U8">
423
+ <dim>-1</dim>
424
+ </port>
425
+ <port id="5" precision="I32">
426
+ <dim>-1</dim>
427
+ </port>
428
+ <port id="6" precision="I32">
429
+ <dim>-1</dim>
430
+ </port>
431
+ <port id="7" precision="U8">
432
+ <dim>-1</dim>
433
+ </port>
434
+ <port id="8" precision="I32">
435
+ <dim>-1</dim>
436
+ </port>
437
+ <port id="9" precision="I32">
438
+ <dim>-1</dim>
439
+ </port>
440
+ <port id="10" precision="U8">
441
+ <dim>-1</dim>
442
+ </port>
443
+ <port id="11" precision="I32">
444
+ <dim>-1</dim>
445
+ </port>
446
+ <port id="12" precision="I32">
447
+ <dim>-1</dim>
448
+ </port>
449
+ <port id="13" precision="U8">
450
+ <dim>-1</dim>
451
+ </port>
452
+ <port id="14" precision="I32">
453
+ <dim>-1</dim>
454
+ </port>
455
+ <port id="15" precision="I32">
456
+ <dim>-1</dim>
457
+ </port>
458
+ <port id="16" precision="U8">
459
+ <dim>-1</dim>
460
+ </port>
461
+ <port id="17" precision="I32">
462
+ <dim>475</dim>
463
+ </port>
464
  </input>
465
  <output>
466
+ <port id="18" precision="I32">
467
+ <dim>-1</dim>
468
+ </port>
469
+ <port id="19" precision="I32">
470
+ <dim>-1</dim>
471
+ </port>
472
+ <port id="20" precision="I32">
473
+ <dim>-1</dim>
474
  </port>
475
  </output>
476
  </layer>
477
+ <layer id="32" name="Subtract_281999" type="Subtract" version="opset1">
478
+ <data auto_broadcast="numpy" />
479
  <input>
480
+ <port id="0" precision="I32">
481
+ <dim>-1</dim>
482
+ </port>
483
+ <port id="1" precision="I32">
484
+ <dim>-1</dim>
485
  </port>
486
  </input>
487
  <output>
 
490
  </port>
491
  </output>
492
  </layer>
493
+ <layer id="33" name="Constant_282000" type="Const" version="opset1">
494
+ <data element_type="i32" shape="" offset="1262031" size="4" />
495
+ <output>
496
+ <port id="0" precision="I32" />
497
+ </output>
498
+ </layer>
499
+ <layer id="34" name="Minimum_282001" type="Minimum" version="opset1">
500
+ <data auto_broadcast="numpy" />
501
  <input>
502
  <port id="0" precision="I32">
503
  <dim>-1</dim>
504
+ </port>
505
+ <port id="1" precision="I32" />
506
+ </input>
507
+ <output>
508
+ <port id="2" precision="I32">
509
  <dim>-1</dim>
510
  </port>
511
+ </output>
512
+ </layer>
513
+ <layer id="35" name="Subtract_282002" type="Subtract" version="opset1">
514
+ <data auto_broadcast="numpy" />
515
+ <input>
516
+ <port id="0" precision="I32">
517
  <dim>-1</dim>
 
518
  </port>
519
+ <port id="1" precision="I32">
520
  <dim>-1</dim>
521
  </port>
522
  </input>
523
  <output>
524
+ <port id="2" precision="I32">
 
525
  <dim>-1</dim>
526
  </port>
527
  </output>
528
  </layer>
529
+ <layer id="36" name="Constant_282003" type="Const" version="opset1">
530
+ <data element_type="i32" shape="1" offset="1262035" size="4" />
531
  <output>
532
+ <port id="0" precision="I32">
533
  <dim>1</dim>
534
  </port>
535
  </output>
536
  </layer>
537
+ <layer id="37" name="CombineSegments_282004" type="CombineSegments" version="extension">
 
538
  <input>
539
  <port id="0" precision="I32">
540
  <dim>-1</dim>
541
+ </port>
542
+ <port id="1" precision="I32">
543
  <dim>-1</dim>
544
  </port>
545
+ <port id="2" precision="I32">
546
+ <dim>-1</dim>
547
+ </port>
548
+ <port id="3" precision="I32">
549
  <dim>1</dim>
550
  </port>
551
  </input>
552
  <output>
553
+ <port id="4" precision="I32">
554
+ <dim>-1</dim>
555
+ </port>
556
+ <port id="5" precision="I32">
557
+ <dim>-1</dim>
558
+ </port>
559
+ <port id="6" precision="I32">
560
+ <dim>-1</dim>
561
+ </port>
562
+ <port id="7" precision="I32">
563
  <dim>-1</dim>
564
+ </port>
565
+ <port id="8" precision="I32">
566
+ <dim>-1</dim>
567
+ </port>
568
+ <port id="9" precision="I32">
569
  <dim>-1</dim>
570
  </port>
571
  </output>
572
  </layer>
573
+ <layer id="38" name="Subtract_282005" type="Subtract" version="opset1">
574
+ <data auto_broadcast="numpy" />
575
  <input>
576
  <port id="0" precision="I32">
577
  <dim>-1</dim>
578
+ </port>
579
+ <port id="1" precision="I32">
580
  <dim>-1</dim>
581
  </port>
582
  </input>
583
  <output>
584
+ <port id="2" precision="I32">
 
585
  <dim>-1</dim>
586
  </port>
587
  </output>
588
  </layer>
589
+ <layer id="39" name="Constant_282006" type="Const" version="opset1">
590
+ <data element_type="i32" shape="" offset="1262035" size="4" />
591
  <output>
592
  <port id="0" precision="I32" />
593
  </output>
594
  </layer>
595
+ <layer id="40" name="ReduceMax_282007" type="ReduceMax" version="opset1">
596
+ <data keep_dims="false" />
597
  <input>
598
+ <port id="0" precision="I32">
599
+ <dim>-1</dim>
 
600
  </port>
601
+ <port id="1" precision="I32" />
602
  </input>
603
  <output>
604
+ <port id="2" precision="I32" />
605
+ </output>
606
+ </layer>
607
+ <layer id="41" name="Constant_282008" type="Const" version="opset1">
608
+ <data element_type="i32" shape="" offset="1262039" size="4" />
609
+ <output>
610
+ <port id="0" precision="I32" />
611
  </output>
612
  </layer>
613
+ <layer id="42" name="RaggedToDense_282009" type="RaggedToDense" version="extension">
614
+ <data pad_right="false" />
615
  <input>
616
  <port id="0" precision="I32">
617
  <dim>-1</dim>
 
618
  </port>
619
+ <port id="1" precision="I32">
620
  <dim>-1</dim>
 
621
  </port>
622
  <port id="2" precision="I32">
623
  <dim>-1</dim>
624
  </port>
625
+ <port id="3" precision="I32" />
626
+ <port id="4" precision="I32" />
627
  </input>
628
  <output>
629
+ <port id="5" precision="I32">
630
+ <dim>-1</dim>
631
+ <dim>-1</dim>
632
+ </port>
633
+ <port id="6" precision="BOOL">
634
  <dim>-1</dim>
635
  <dim>-1</dim>
636
  </port>
637
  </output>
638
  </layer>
639
+ <layer id="43" name="Convert_282010" type="Convert" version="opset1">
640
+ <data destination_type="i32" />
641
+ <input>
642
+ <port id="0" precision="BOOL">
643
+ <dim>-1</dim>
644
+ <dim>-1</dim>
645
+ </port>
646
+ </input>
647
  <output>
648
+ <port id="1" precision="I32">
649
+ <dim>-1</dim>
650
+ <dim>-1</dim>
651
  </port>
652
  </output>
653
  </layer>
654
+ <layer id="44" name="Convert_282010" type="Convert" version="opset1">
655
+ <data destination_type="i64" />
656
  <input>
657
  <port id="0" precision="I32">
658
  <dim>-1</dim>
659
  <dim>-1</dim>
660
  </port>
 
 
 
661
  </input>
662
  <output>
663
+ <port id="1" precision="I64" names="attention_mask">
664
  <dim>-1</dim>
665
  <dim>-1</dim>
666
  </port>
667
  </output>
668
  </layer>
669
+ <layer id="46" name="RaggedToDense_282009.0" type="Convert" version="opset1">
670
  <data destination_type="i64" />
671
  <input>
672
  <port id="0" precision="I32">
 
681
  </port>
682
  </output>
683
  </layer>
684
+ <layer id="47" name="Result_282013" type="Result" version="opset1">
685
  <input>
686
  <port id="0" precision="I64">
687
  <dim>-1</dim>
 
689
  </port>
690
  </input>
691
  </layer>
692
+ <layer id="45" name="Result_282015" type="Result" version="opset1">
693
  <input>
694
  <port id="0" precision="I64">
695
  <dim>-1</dim>
 
699
  </layer>
700
  </layers>
701
  <edges>
702
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
703
+ <edge from-layer="1" from-port="0" to-layer="8" to-port="0" />
704
+ <edge from-layer="2" from-port="1" to-layer="3" to-port="0" />
705
+ <edge from-layer="2" from-port="3" to-layer="15" to-port="4" />
706
+ <edge from-layer="2" from-port="2" to-layer="15" to-port="3" />
707
+ <edge from-layer="2" from-port="1" to-layer="15" to-port="2" />
708
  <edge from-layer="3" from-port="1" to-layer="6" to-port="0" />
709
+ <edge from-layer="4" from-port="0" to-layer="6" to-port="1" />
710
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="2" />
711
+ <edge from-layer="6" from-port="3" to-layer="8" to-port="1" />
712
+ <edge from-layer="6" from-port="3" to-layer="11" to-port="0" />
713
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
714
+ <edge from-layer="8" from-port="3" to-layer="15" to-port="0" />
715
+ <edge from-layer="9" from-port="0" to-layer="13" to-port="0" />
716
+ <edge from-layer="10" from-port="0" to-layer="11" to-port="1" />
717
+ <edge from-layer="11" from-port="2" to-layer="13" to-port="1" />
718
+ <edge from-layer="12" from-port="0" to-layer="13" to-port="2" />
719
+ <edge from-layer="13" from-port="3" to-layer="15" to-port="1" />
720
+ <edge from-layer="14" from-port="0" to-layer="15" to-port="5" />
721
+ <edge from-layer="15" from-port="9" to-layer="18" to-port="1" />
722
+ <edge from-layer="15" from-port="6" to-layer="31" to-port="0" />
723
+ <edge from-layer="15" from-port="7" to-layer="31" to-port="1" />
724
+ <edge from-layer="15" from-port="11" to-layer="18" to-port="3" />
725
+ <edge from-layer="15" from-port="10" to-layer="18" to-port="2" />
726
+ <edge from-layer="15" from-port="8" to-layer="18" to-port="0" />
727
+ <edge from-layer="16" from-port="0" to-layer="18" to-port="4" />
728
+ <edge from-layer="17" from-port="0" to-layer="18" to-port="5" />
729
+ <edge from-layer="18" from-port="6" to-layer="21" to-port="0" />
730
+ <edge from-layer="18" from-port="7" to-layer="21" to-port="1" />
731
+ <edge from-layer="18" from-port="8" to-layer="21" to-port="2" />
732
+ <edge from-layer="18" from-port="9" to-layer="21" to-port="3" />
733
+ <edge from-layer="19" from-port="0" to-layer="21" to-port="4" />
734
+ <edge from-layer="20" from-port="0" to-layer="21" to-port="5" />
735
+ <edge from-layer="21" from-port="8" to-layer="31" to-port="4" />
736
+ <edge from-layer="21" from-port="7" to-layer="31" to-port="3" />
737
+ <edge from-layer="21" from-port="6" to-layer="31" to-port="2" />
738
+ <edge from-layer="22" from-port="0" to-layer="23" to-port="0" />
739
+ <edge from-layer="23" from-port="1" to-layer="31" to-port="5" />
740
+ <edge from-layer="23" from-port="2" to-layer="31" to-port="6" />
741
+ <edge from-layer="23" from-port="3" to-layer="31" to-port="7" />
742
+ <edge from-layer="24" from-port="0" to-layer="25" to-port="0" />
743
+ <edge from-layer="25" from-port="1" to-layer="31" to-port="8" />
744
+ <edge from-layer="25" from-port="2" to-layer="31" to-port="9" />
745
+ <edge from-layer="25" from-port="3" to-layer="31" to-port="10" />
746
+ <edge from-layer="26" from-port="0" to-layer="27" to-port="0" />
747
+ <edge from-layer="27" from-port="3" to-layer="31" to-port="13" />
748
+ <edge from-layer="27" from-port="2" to-layer="31" to-port="12" />
749
+ <edge from-layer="27" from-port="1" to-layer="31" to-port="11" />
750
+ <edge from-layer="28" from-port="0" to-layer="29" to-port="0" />
751
+ <edge from-layer="29" from-port="1" to-layer="31" to-port="14" />
752
+ <edge from-layer="29" from-port="2" to-layer="31" to-port="15" />
753
+ <edge from-layer="29" from-port="3" to-layer="31" to-port="16" />
754
+ <edge from-layer="30" from-port="0" to-layer="31" to-port="17" />
755
+ <edge from-layer="31" from-port="19" to-layer="32" to-port="0" />
756
+ <edge from-layer="31" from-port="18" to-layer="32" to-port="1" />
757
+ <edge from-layer="31" from-port="19" to-layer="35" to-port="0" />
758
+ <edge from-layer="31" from-port="20" to-layer="37" to-port="2" />
759
+ <edge from-layer="31" from-port="19" to-layer="37" to-port="1" />
760
+ <edge from-layer="32" from-port="2" to-layer="34" to-port="0" />
761
+ <edge from-layer="33" from-port="0" to-layer="34" to-port="1" />
762
+ <edge from-layer="34" from-port="2" to-layer="35" to-port="1" />
763
+ <edge from-layer="35" from-port="2" to-layer="37" to-port="0" />
764
+ <edge from-layer="36" from-port="0" to-layer="37" to-port="3" />
765
+ <edge from-layer="37" from-port="5" to-layer="42" to-port="1" />
766
+ <edge from-layer="37" from-port="6" to-layer="42" to-port="2" />
767
+ <edge from-layer="37" from-port="4" to-layer="42" to-port="0" />
768
+ <edge from-layer="37" from-port="4" to-layer="38" to-port="1" />
769
+ <edge from-layer="37" from-port="5" to-layer="38" to-port="0" />
770
+ <edge from-layer="38" from-port="2" to-layer="40" to-port="0" />
771
+ <edge from-layer="39" from-port="0" to-layer="40" to-port="1" />
772
+ <edge from-layer="40" from-port="2" to-layer="42" to-port="3" />
773
+ <edge from-layer="41" from-port="0" to-layer="42" to-port="4" />
774
+ <edge from-layer="42" from-port="6" to-layer="43" to-port="0" />
775
+ <edge from-layer="42" from-port="5" to-layer="46" to-port="0" />
776
+ <edge from-layer="43" from-port="1" to-layer="44" to-port="0" />
777
+ <edge from-layer="44" from-port="1" to-layer="45" to-port="0" />
778
+ <edge from-layer="46" from-port="1" to-layer="47" to-port="0" />
779
  </edges>
780
  <rt_info>
781
  <bos_token_id value="1" />
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff