Llama-3.2-3B-Instruct-ov-INT8 / openvino_detokenizer.xml
srang992's picture
initial commit
441cb3b verified
<?xml version="1.0"?>
<net name="detokenizer" version="11">
<layers>
<layer id="0" name="Parameter_212552" type="Parameter" version="opset1">
<data shape="?,?" element_type="i64" />
<output>
<port id="0" precision="I64" names="Parameter_212552">
<dim>-1</dim>
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="1" name="Convert_212568" type="Convert" version="opset1">
<data destination_type="i32" />
<input>
<port id="0" precision="I64">
<dim>-1</dim>
<dim>-1</dim>
</port>
</input>
<output>
<port id="1" precision="I32">
<dim>-1</dim>
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="2" name="Constant_212447" type="Const" version="opset1">
<data element_type="u8" shape="1586436" offset="0" size="1586436" />
<output>
<port id="0" precision="U8">
<dim>1586436</dim>
</port>
</output>
</layer>
<layer id="3" name="StringTensorUnpack_212448" type="StringTensorUnpack" version="extension">
<data mode="begins_ends" />
<input>
<port id="0" precision="U8">
<dim>1586436</dim>
</port>
</input>
<output>
<port id="1" precision="I32">
<dim>-1</dim>
</port>
<port id="2" precision="I32">
<dim>-1</dim>
</port>
<port id="3" precision="U8">
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="4" name="VocabDecoder_212553" type="VocabDecoder" version="extension">
<data skip_tokens="128000, 128001, 128002, 128003, 128004, 128005, 128006, 128007, 128008, 128009, 128010, 128011, 128012, 128013, 128014, 128015, 128016, 128017, 128018, 128019, 128020, 128021, 128022, 128023, 128024, 128025, 128026, 128027, 128028, 128029, 128030, 128031, 128032, 128033, 128034, 128035, 128036, 128037, 128038, 128039, 128040, 128041, 128042, 128043, 128044, 128045, 128046, 128047, 128048, 128049, 128050, 128051, 128052, 128053, 128054, 128055, 128056, 128057, 128058, 128059, 128060, 128061, 128062, 128063, 128064, 128065, 128066, 128067, 128068, 128069, 128070, 128071, 128072, 128073, 128074, 128075, 128076, 128077, 128078, 128079, 128080, 128081, 128082, 128083, 128084, 128085, 128086, 128087, 128088, 128089, 128090, 128091, 128092, 128093, 128094, 128095, 128096, 128097, 128098, 128099, 128100, 128101, 128102, 128103, 128104, 128105, 128106, 128107, 128108, 128109, 128110, 128111, 128112, 128113, 128114, 128115, 128116, 128117, 128118, 128119, 128120, 128121, 128122, 128123, 128124, 128125, 128126, 128127, 128128, 128129, 128130, 128131, 128132, 128133, 128134, 128135, 128136, 128137, 128138, 128139, 128140, 128141, 128142, 128143, 128144, 128145, 128146, 128147, 128148, 128149, 128150, 128151, 128152, 128153, 128154, 128155, 128156, 128157, 128158, 128159, 128160, 128161, 128162, 128163, 128164, 128165, 128166, 128167, 128168, 128169, 128170, 128171, 128172, 128173, 128174, 128175, 128176, 128177, 128178, 128179, 128180, 128181, 128182, 128183, 128184, 128185, 128186, 128187, 128188, 128189, 128190, 128191, 128192, 128193, 128194, 128195, 128196, 128197, 128198, 128199, 128200, 128201, 128202, 128203, 128204, 128205, 128206, 128207, 128208, 128209, 128210, 128211, 128212, 128213, 128214, 128215, 128216, 128217, 128218, 128219, 128220, 128221, 128222, 128223, 128224, 128225, 128226, 128227, 128228, 128229, 128230, 128231, 128232, 128233, 128234, 128235, 128236, 128237, 128238, 128239, 128240, 128241, 128242, 128243, 128244, 128245, 128246, 128247, 128248, 128249, 128250, 128251, 128252, 128253, 128254, 128255" />
<input>
<port id="0" precision="I32">
<dim>-1</dim>
<dim>-1</dim>
</port>
<port id="1" precision="I32">
<dim>-1</dim>
</port>
<port id="2" precision="I32">
<dim>-1</dim>
</port>
<port id="3" precision="U8">
<dim>-1</dim>
</port>
</input>
<output>
<port id="4" precision="I32">
<dim>-1</dim>
</port>
<port id="5" precision="I32">
<dim>-1</dim>
</port>
<port id="6" precision="I32">
<dim>-1</dim>
</port>
<port id="7" precision="I32">
<dim>-1</dim>
</port>
<port id="8" precision="U8">
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="5" name="CharsToBytes_212554" type="CharsToBytes" version="extension">
<input>
<port id="0" precision="I32">
<dim>-1</dim>
</port>
<port id="1" precision="I32">
<dim>-1</dim>
</port>
<port id="2" precision="I32">
<dim>-1</dim>
</port>
<port id="3" precision="I32">
<dim>-1</dim>
</port>
<port id="4" precision="U8">
<dim>-1</dim>
</port>
</input>
<output>
<port id="5" precision="I32">
<dim>-1</dim>
</port>
<port id="6" precision="I32">
<dim>-1</dim>
</port>
<port id="7" precision="U8">
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="6" name="Constant_212556" type="Const" version="opset1">
<data element_type="u8" shape="47" offset="1586436" size="47" />
<output>
<port id="0" precision="U8">
<dim>47</dim>
</port>
</output>
</layer>
<layer id="7" name="Constant_212558" type="Const" version="opset1">
<data element_type="u8" shape="2" offset="1586483" size="2" />
<output>
<port id="0" precision="U8">
<dim>2</dim>
</port>
</output>
</layer>
<layer id="8" name="RegexNormalization_212559" type="RegexNormalization" version="extension">
<data global_replace="true" />
<input>
<port id="0" precision="I32">
<dim>-1</dim>
</port>
<port id="1" precision="I32">
<dim>-1</dim>
</port>
<port id="2" precision="U8">
<dim>-1</dim>
</port>
<port id="3" precision="U8">
<dim>47</dim>
</port>
<port id="4" precision="U8">
<dim>2</dim>
</port>
</input>
<output>
<port id="5" precision="I32">
<dim>-1</dim>
</port>
<port id="6" precision="I32">
<dim>-1</dim>
</port>
<port id="7" precision="U8">
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="9" name="StringTensorPack_212560" type="StringTensorPack" version="extension">
<data mode="begins_ends" />
<input>
<port id="0" precision="I32">
<dim>-1</dim>
</port>
<port id="1" precision="I32">
<dim>-1</dim>
</port>
<port id="2" precision="U8">
<dim>-1</dim>
</port>
</input>
<output>
<port id="3" precision="STRING" names="string_output">
<dim>-1</dim>
</port>
</output>
</layer>
<layer id="10" name="Result_212561" type="Result" version="opset1">
<input>
<port id="0" precision="STRING">
<dim>-1</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
<edge from-layer="1" from-port="1" to-layer="4" to-port="0" />
<edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
<edge from-layer="3" from-port="1" to-layer="4" to-port="1" />
<edge from-layer="3" from-port="2" to-layer="4" to-port="2" />
<edge from-layer="3" from-port="3" to-layer="4" to-port="3" />
<edge from-layer="4" from-port="8" to-layer="5" to-port="4" />
<edge from-layer="4" from-port="7" to-layer="5" to-port="3" />
<edge from-layer="4" from-port="6" to-layer="5" to-port="2" />
<edge from-layer="4" from-port="5" to-layer="5" to-port="1" />
<edge from-layer="4" from-port="4" to-layer="5" to-port="0" />
<edge from-layer="5" from-port="5" to-layer="8" to-port="0" />
<edge from-layer="5" from-port="6" to-layer="8" to-port="1" />
<edge from-layer="5" from-port="7" to-layer="8" to-port="2" />
<edge from-layer="6" from-port="0" to-layer="8" to-port="3" />
<edge from-layer="7" from-port="0" to-layer="8" to-port="4" />
<edge from-layer="8" from-port="5" to-layer="9" to-port="0" />
<edge from-layer="8" from-port="6" to-layer="9" to-port="1" />
<edge from-layer="8" from-port="7" to-layer="9" to-port="2" />
<edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
</edges>
<rt_info>
<bos_token_id value="128000" />
<chat_template value="{{- bos_token }}&#10;{%- if custom_tools is defined %}&#10; {%- set tools = custom_tools %}&#10;{%- endif %}&#10;{%- if not tools_in_user_message is defined %}&#10; {%- set tools_in_user_message = true %}&#10;{%- endif %}&#10;{%- if not date_string is defined %}&#10; {%- if strftime_now is defined %}&#10; {%- set date_string = strftime_now(&quot;%d %b %Y&quot;) %}&#10; {%- else %}&#10; {%- set date_string = &quot;26 Jul 2024&quot; %}&#10; {%- endif %}&#10;{%- endif %}&#10;{%- if not tools is defined %}&#10; {%- set tools = none %}&#10;{%- endif %}&#10;&#10;{#- This block extracts the system message, so we can slot it into the right place. #}&#10;{%- if messages[0]['role'] == 'system' %}&#10; {%- set system_message = messages[0]['content']|trim %}&#10; {%- set messages = messages[1:] %}&#10;{%- else %}&#10; {%- set system_message = &quot;&quot; %}&#10;{%- endif %}&#10;&#10;{#- System message #}&#10;{{- &quot;&lt;|start_header_id|>system&lt;|end_header_id|>\n\n&quot; }}&#10;{%- if tools is not none %}&#10; {{- &quot;Environment: ipython\n&quot; }}&#10;{%- endif %}&#10;{{- &quot;Cutting Knowledge Date: December 2023\n&quot; }}&#10;{{- &quot;Today Date: &quot; + date_string + &quot;\n\n&quot; }}&#10;{%- if tools is not none and not tools_in_user_message %}&#10; {{- &quot;You have access to the following functions. To call a function, please respond with JSON for a function call.&quot; }}&#10; {{- 'Respond in the format {&quot;name&quot;: function name, &quot;parameters&quot;: dictionary of argument name and its value}.' }}&#10; {{- &quot;Do not use variables.\n\n&quot; }}&#10; {%- for t in tools %}&#10; {{- t | tojson(indent=4) }}&#10; {{- &quot;\n\n&quot; }}&#10; {%- endfor %}&#10;{%- endif %}&#10;{{- system_message }}&#10;{{- &quot;&lt;|eot_id|>&quot; }}&#10;&#10;{#- Custom tools are passed in a user message with some extra guidance #}&#10;{%- if tools_in_user_message and not tools is none %}&#10; {#- Extract the first user message so we can plug it in here #}&#10; {%- if messages | length != 0 %}&#10; {%- set first_user_message = messages[0]['content']|trim %}&#10; {%- set messages = messages[1:] %}&#10; {%- else %}&#10; {{- raise_exception(&quot;Cannot put tools in the first user message when there's no first user message!&quot;) }}&#10;{%- endif %}&#10; {{- '&lt;|start_header_id|>user&lt;|end_header_id|>\n\n' -}}&#10; {{- &quot;Given the following functions, please respond with a JSON for a function call &quot; }}&#10; {{- &quot;with its proper arguments that best answers the given prompt.\n\n&quot; }}&#10; {{- 'Respond in the format {&quot;name&quot;: function name, &quot;parameters&quot;: dictionary of argument name and its value}.' }}&#10; {{- &quot;Do not use variables.\n\n&quot; }}&#10; {%- for t in tools %}&#10; {{- t | tojson(indent=4) }}&#10; {{- &quot;\n\n&quot; }}&#10; {%- endfor %}&#10; {{- first_user_message + &quot;&lt;|eot_id|>&quot;}}&#10;{%- endif %}&#10;&#10;{%- for message in messages %}&#10; {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}&#10; {{- '&lt;|start_header_id|>' + message['role'] + '&lt;|end_header_id|>\n\n'+ message['content'] | trim + '&lt;|eot_id|>' }}&#10; {%- elif 'tool_calls' in message %}&#10; {%- if not message.tool_calls|length == 1 %}&#10; {{- raise_exception(&quot;This model only supports single tool-calls at once!&quot;) }}&#10; {%- endif %}&#10; {%- set tool_call = message.tool_calls[0].function %}&#10; {{- '&lt;|start_header_id|>assistant&lt;|end_header_id|>\n\n' -}}&#10; {{- '{&quot;name&quot;: &quot;' + tool_call.name + '&quot;, ' }}&#10; {{- '&quot;parameters&quot;: ' }}&#10; {{- tool_call.arguments | tojson }}&#10; {{- &quot;}&quot; }}&#10; {{- &quot;&lt;|eot_id|>&quot; }}&#10; {%- elif message.role == &quot;tool&quot; or message.role == &quot;ipython&quot; %}&#10; {{- &quot;&lt;|start_header_id|>ipython&lt;|end_header_id|>\n\n&quot; }}&#10; {%- if message.content is mapping or message.content is iterable %}&#10; {{- message.content | tojson }}&#10; {%- else %}&#10; {{- message.content }}&#10; {%- endif %}&#10; {{- &quot;&lt;|eot_id|>&quot; }}&#10; {%- endif %}&#10;{%- endfor %}&#10;{%- if add_generation_prompt %}&#10; {{- '&lt;|start_header_id|>assistant&lt;|end_header_id|>\n\n' }}&#10;{%- endif %}&#10;" />
<eos_token_id value="128009" />
<original_tokenizer_class value="&lt;class 'transformers.tokenization_utils_fast.PreTrainedTokenizerFast'>" />
</rt_info>
</net>