{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import os\n",
"import json\n",
"from typing import List, Union\n",
"from pprint import pprint\n",
"import torch\n",
"import inspect\n",
"from collections import defaultdict\n",
"from transformers import AutoTokenizer\n",
"from agentstudio.agentstudio_utils import *\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Global Variables"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['xlam_7b_r', 'xlam_8x7b_r', 'xlam_8x22b_r']\n"
]
}
],
"source": [
"def get_current_weather(location: str, format: str, date: int):\n",
" \"\"\"\n",
" Get the current weather\n",
"\n",
" Args:\n",
" location: The city and state, e.g. San Francisco, CA\n",
" format: The temperature unit to use. Infer this from the users location. (choices: [\"celsius\", \"fahrenheit\"])\n",
" date: a specific date to get the weather for\n",
" \"\"\"\n",
" pass\n",
"\n",
"def get_current_temperature(location: str, unit: str, longitude: float, latitude: float) -> float:\n",
" \"\"\"\n",
" Get the current temperature at a location.\n",
" \n",
" Args:\n",
" location: The location to get the temperature for, in the format \"City, Country\"\n",
" unit: The unit to return the temperature in. (choices: [\"celsius\", \"fahrenheit\"])\n",
" longitude: the longitude of the location\n",
" latitude: the latitude of the location\n",
" Returns:\n",
" The current temperature at the specified location in the specified units, as a float.\n",
" \"\"\"\n",
" return 22. # A real function should probably actually get the temperature!\n",
"\n",
"def get_current_wind_speed(location: str) -> float:\n",
" \"\"\"\n",
" Get the current wind speed in km/h at a given location.\n",
" \n",
" Args:\n",
" location: The location to get the temperature for, in the format \"City, Country\"\n",
" Returns:\n",
" The current wind speed at the given location in km/h, as a float.\n",
" \"\"\"\n",
" return 6. # A real function should probably actually get the wind speed!\n",
"\n",
"tools_simple_str = \"\"\"\n",
"def get_current_weather(location: str, format: str, date: int):\n",
" '''\n",
" Get the current weather\n",
"\n",
" Args:\n",
" location: The city and state, e.g. San Francisco, CA\n",
" format: The temperature unit to use. Infer this from the users location. (choices: [\"celsius\", \"fahrenheit\"])\n",
" date: a specific date to get the weather for\n",
" '''\n",
" pass\n",
"\n",
"def get_current_temperature(location: str, unit: str, longitude: float, latitude: float) -> float:\n",
" '''\n",
" Get the current temperature at a location.\n",
" \n",
" Args:\n",
" location: The location to get the temperature for, in the format \"City, Country\"\n",
" unit: The unit to return the temperature in. (choices: [\"celsius\", \"fahrenheit\"])\n",
" longitude: the longitude of the location\n",
" latitude: the latitude of the location\n",
" Returns:\n",
" The current temperature at the specified location in the specified units, as a float.\n",
" '''\n",
" return 22. # A real function should probably actually get the temperature!\n",
"\n",
"def get_current_wind_speed(location: str) -> float:\n",
" '''\n",
" Get the current wind speed in km/h at a given location.\n",
" \n",
" Args:\n",
" location: The location to get the temperature for, in the format \"City, Country\"\n",
" Returns:\n",
" The current wind speed at the given location in km/h, as a float.\n",
" '''\n",
" return 6. # A real function should probably actually get the wind speed!\n",
"\"\"\"\n",
"\n",
"tools = [get_current_weather, get_current_temperature, get_current_wind_speed]\n",
"tools_str = json.dumps([{\"name\": tool.__name__, \"parameters\": str(inspect.signature(tool)), \"description\": tool.__doc__.strip()} for tool in tools], indent=4)\n",
"\n",
"BASE_MODELS_DIR = \"/export/agentstudio-family/checkpoints/\"\n",
"BASE_XLAM_DIR = f\"{BASE_MODELS_DIR}/xlam_v1\"\n",
"\n",
"models_mappping = {\n",
" # # \"mistralai/Mistral-7B-Instruct-v0.2\": \"mistral_7b_instruct_v0.2\", # NOT YET UPDATED for the \"system\" location in tokenizer\n",
" # \"mistralai/Mistral-7B-Instruct-v0.3\": \"mistral_7b_instruct_v0.3\",\n",
" # \"mistralai/Mixtral-8x7B-Instruct-v0.1\": \"mixtral_8x7b_instruct_v0.1\",\n",
" # \"mistralai/Mixtral-8x22B-Instruct-v0.1\": \"mixtral_8x22b_instruct_v0.1\",\n",
" # \"mistralai/Mistral-Small-Instruct-2409\": \"mistral_small_instruct_2409\",\n",
" # \"mistralai/Mistral-Nemo-Instruct-2407\": \"mistral_nemo_instruct_2407\",\n",
" # \"mistralai/Mistral-Large-Instruct-2407\": \"mistral_large_instruct_2407\",\n",
" \"Salesforce/xLAM-7b-r\": \"xlam_7b_r\",\n",
" \"Salesforce/xLAM-8x7b-r\": \"xlam_8x7b_r\",\n",
" \"Salesforce/xLAM-8x22b-r\": \"xlam_8x22b_r\",\n",
" }\n",
"\n",
"model_list = list(models_mappping.values())\n",
"print(model_list)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### DO NOT RUN THIS CELL as JIANGUO has updated for xLAM 1.0 Series (xLAM Tool Call Template)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"xlam_chat_template = \"\"\"\n",
"{%- if messages[0][\"role\"] == \"system\" %}\n",
" {%- set system_message = messages[0][\"content\"] %}\n",
" {%- if messages[1][\"role\"] == \"system\" %}\n",
" {%- set format_message = messages[1][\"content\"] %}\n",
" {%- set loop_messages = messages[2:] %}\n",
" {%- else %}\n",
" {%- set loop_messages = messages[1:] %}\n",
" {%- endif %}\n",
"{%- else %}\n",
" {%- set loop_messages = messages %}\n",
"{%- endif %}\n",
"{%- if not tools is defined %}\n",
" {%- set tools = none %}\n",
"{%- endif %}\n",
"\n",
"{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n",
"{%- set ns = namespace() %}\n",
"{%- set ns.index = 0 %}\n",
"{%- for message in loop_messages %}\n",
" {%- if ((message[\"role\"] == \"user\") and (ns.index % 2 != 0)) or (message[\"role\"] not in [\"user\", \"assistant\", \"tool\", \"tool_results\"]) %}\n",
" {{- raise_exception(\"After the optional system message, conversation roles can only be from user/assistant/tool; After each tool message, the next message must be from the assistant\") }}\n",
" {%- endif %}\n",
" {%- set ns.index = ns.index + 1 %}\n",
"{%- endfor %}\n",
"\n",
"{{- bos_token }}\n",
"{{- \" [INST]\" }}\n",
"{%- if system_message is not defined %}\n",
" {% set system_message %}\n",
"You are an expert in composing functions. You are given a question and a set of possible functions. \n",
"Based on the question, you will need to make one or more function/tool calls to achieve the purpose. \n",
"If none of the functions can be used, point it out and refuse to answer. \n",
"If the given question lacks the parameters required by the function, also point it out.{% endset %}\n",
"{%- endif %}\n",
"{{- \"\\n[BEGIN OF TASK INSTRUCTION]\\n\" + system_message + \"\\n[END OF TASK INSTRUCTION]\\n\\n\" }}\n",
"\n",
"{%- if tools is not none %}\n",
" {{- \"[BEGIN OF AVAILABLE_TOOLS]\\n\" }}\n",
" {{- tools|string }}\n",
" {{- \"\\n[END OF AVAILABLE_TOOLS]\\n\\n\" }}\n",
"{%- endif %}\n",
" \n",
"{%- if format_message is not defined %}\n",
" {% set format_message %}\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
" {% endset %}\n",
"{%- endif %}\n",
"{{- \"[BEGIN OF FORMAT INSTRUCTION]\\n\" + format_message + \"[END OF FORMAT INSTRUCTION]\\n\\n\" }}\n",
"\n",
"{%- if loop_messages[0][\"role\"] == \"user\" %}\n",
" {%- set query = loop_messages[0][\"content\"] %}\n",
" {%- set remaining_messages = loop_messages[1:] %}\n",
" {{- \"[BEGIN OF QUERY]\\n\" + query + \"\\n[END OF QUERY]\" }}\n",
" {%- if remaining_messages|length == 0 %}\n",
" {{- \"\\n\" }}\n",
" {%- endif %}\n",
" {%- set loop_messages = remaining_messages %}\n",
"{%- endif %}\n",
"\n",
"{% if loop_messages %}\n",
" {{- \"\\n[BEGIN OF HISTORY STEPS]\\n[\" }}\n",
" {%- set step_id = namespace(value=1) %}\n",
" {%- for message in loop_messages %}\n",
" {%- if message[\"role\"] == \"assistant\" %}\n",
" {%- if message.tool_calls is defined and message.tool_calls is not none %}\n",
" {% if message.tool_calls is iterable and (message.tool_calls is not string and message.tool_calls is not mapping) %}\n",
" {{- \"{'thought':\" + message.content|tojson + \", 'tool_calls':\" + message.tool_calls|tojson + \", 'step_id':\" + step_id.value|string + \",\" }}\n",
" {%- else %}\n",
" {{- raise_exception(\"The tool_calls must be a list!\") }}\n",
" {%- endif %}\n",
" {%- else %}\n",
" {{- \"{'thought':\" + message.content|tojson + \", 'tool_calls':[]\" + \", 'step_id':\" + step_id.value|string + \",\" }}\n",
" {%- endif %}\n",
" {%- if loop.nextitem is not defined %}\n",
" {{- \" 'next_observation':''}\" }}\n",
" {%- elif loop.nextitem[\"role\"] == \"user\" %}\n",
" {{- \" 'next_observation':''\" }}\n",
" {%- elif loop.nextitem[\"role\"] != \"tool_results\" and loop.nextitem[\"role\"] != \"tool\" %}\n",
" {{- \" 'next_observation':''},\" }}\n",
" {%- endif %}\n",
" {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n",
" {{- \" 'next_observation':\" + message.content|tojson }}\n",
" {%- if loop.nextitem is defined and loop.nextitem[\"role\"] != \"user\" %}\n",
" {{- \"}, \" }}\n",
" {%- elif not loop.nextitem is defined %}\n",
" {{- \"} \" }}\n",
" {%- endif %}\n",
" {%- set step_id.value = step_id.value + 1 %}\n",
" {%- elif message[\"role\"] == \"user\" %}\n",
" {{- \", 'user_input':\" + message.content|tojson }}\n",
" {%- if loop.nextitem is defined %}\n",
" {{- \"}, \" }}\n",
" {%- else %}\n",
" {{- \"} \" }}\n",
" {%- endif %}\n",
" {%- set step_id.value = step_id.value + 1 %}\n",
" {%- endif %}\n",
" {%- endfor %}\n",
" {{- \"]\\n[END OF HISTORY STEPS]\\n\\n\"}}\n",
"{%- endif %}\n",
"{{- \" [/INST]\" }}\n",
"\"\"\".strip()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### DO NOT RUN THIS CELL as JIANGUO has updated for xLAM 1.0 Series(Update Tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tokenizer is already updated for xLAM 1.0 series: /export/agentstudio-family/checkpoints//xlam_v1/xlam_7b_r. Contact ❤❤❤Jianguo Zhang❤❤❤ for more details! \n"
]
}
],
"source": [
"def check_tokenizer(model_list, BASE_XLAM_DIR):\n",
" for checkpoint in model_list:\n",
" tokenize_config = open_json(os.path.join(BASE_XLAM_DIR, checkpoint, \"tokenizer_config.json\"))\n",
" \n",
" del tokenize_config[\"chat_template\"]\n",
" original_tokenize_config = open_json(os.path.join(BASE_XLAM_DIR, checkpoint, \"original_tokenizer_config.json\"))\n",
" del original_tokenize_config[\"chat_template\"]\n",
" if tokenize_config != original_tokenize_config:\n",
" raise ValueError(f\"Tokenizer config is not the same for {checkpoint}\")\n",
"\n",
"def update_tokenizer(model_list, BASE_XLAM_DIR, xlam_chat_template):\n",
" \"\"\"\n",
" Chat_templates: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md\n",
" We update Mixtral-8x22b-inst-v0.1 to better handle the system prompt in https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1/discussions/54\n",
" \"\"\"\n",
" for checkpoint in model_list:\n",
" if os.path.exists(os.path.join(BASE_XLAM_DIR, checkpoint)):\n",
" print(\"Tokenizer is already updated for xLAM 1.0 series: {}. Contact ❤❤❤Jianguo Zhang❤❤❤ for more details! \".format(os.path.join(BASE_XLAM_DIR, checkpoint)))\n",
" return \n",
" \n",
" checkpoint = os.path.join(BASE_XLAM_DIR, checkpoint)\n",
" \n",
" # For Mixtral-8x22b-inst-v0.1, move the \"SYSTEM\" prompt to the beginning to improve conversation flow. \n",
" # Originally, it appeared toward the end of the prompt, disrupting natural interactions.\n",
" # Original template: \"[INST] USER[/INST][TOOL_CALLS] ASSISTANT TOOL CALLS[TOOL_RESULTS] OBSERVATIONS[/TOOL_RESULTS] USER[/INST] ASSISTANT[AVAILABLE_TOOLS] LIST OF TOOLS[/AVAILABLE_TOOLS][INST] SYSTEM PROMPT\\n\\nUSER[/INST]\"\"\n",
" if \"8x22b\" in checkpoint:\n",
" original_tokenize_config = open_json(os.path.join(checkpoint, \"original_tokenizer_config_fixed.json\"))\n",
" else:\n",
" original_tokenize_config = open_json(os.path.join(checkpoint, \"original_tokenizer_config.json\"))\n",
" chat_template = {\n",
" \"default\": original_tokenize_config[\"chat_template\"],\n",
" \"tool_use\": xlam_chat_template\n",
" }\n",
" \n",
" original_tokenize_config[\"chat_template\"] = chat_template\n",
" save_json(os.path.join(checkpoint, \"tokenizer_config.json\"), original_tokenize_config)\n",
" print(f\"Updated tokenizer for {checkpoint}\")\n",
"\n",
"update_tokenizer(model_list, BASE_XLAM_DIR, xlam_chat_template)\n",
"check_tokenizer(model_list, BASE_XLAM_DIR)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Verify the tokenizer"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def verify_tokenizer(messages, model_list, tools, BASE_MODELS_DIR, fc_modes, file_suffix):\n",
" for fc_mode in fc_modes:\n",
" xlam_models_prompt = defaultdict(str)\n",
"\n",
" for checkpoint in model_list: \n",
" if \"xlam_\" in checkpoint:\n",
" checkpoint = os.path.join(BASE_MODELS_DIR, \"xlam_v1\", checkpoint)\n",
" elif any(keyword in checkpoint for keyword in [\"mistral\", \"mixtral\"]):\n",
" checkpoint = os.path.join(BASE_MODELS_DIR, \"mistral\", \"raw\", checkpoint)\n",
" tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
" \n",
" if fc_mode:\n",
" model_input = tokenizer.apply_chat_template(\n",
" messages,\n",
" tools=tools,\n",
" tokenize=False\n",
" )\n",
" else:\n",
" model_input = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize=False\n",
" )\n",
" \n",
" if \"xlam_8x22b_r\" in checkpoint:\n",
" print(\"*\"*100)\n",
" print(\"Checkpoint: \", checkpoint)\n",
" # print(\"Chat Template: \", tokenizer.chat_template)\n",
" \n",
" print(\"-\" * 100)\n",
" print(\"FC Mode: \", fc_mode)\n",
" print(\"-\" * 100)\n",
" print(model_input)\n",
" xlam_models_prompt[checkpoint] = model_input\n",
" \n",
" # save the final message for reference\n",
" xlam_models_prompt[\"messages\"] = messages\n",
" if fc_mode:\n",
" xlam_models_prompt[\"avaliable_tools\"] = tools_str\n",
" xlam_models_prompt[\"avaliable_tools-easy_to_read\"] = tools_simple_str\n",
" \n",
" if fc_mode:\n",
" save_json(f\"xlam_chat_template-function_call_mode-{file_suffix}.json\", xlam_models_prompt)\n",
" else:\n",
" save_json(f\"xlam_chat_template-non_function_call_mode-{file_suffix}.json\", xlam_models_prompt)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 1 (General Chat without a System Message at the Beginning)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: True\n",
"----------------------------------------------------------------------------------------------------\n",
" [INST]\n",
"[BEGIN OF TASK INSTRUCTION]\n",
"You are an expert in composing functions. You are given a question and a set of possible functions. \n",
"Based on the question, you will need to make one or more function/tool calls to achieve the purpose. \n",
"If none of the functions can be used, point it out and refuse to answer. \n",
"If the given question lacks the parameters required by the function, also point it out.\n",
"[END OF TASK INSTRUCTION]\n",
"\n",
"[BEGIN OF AVAILABLE_TOOLS]\n",
"[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n",
"[END OF AVAILABLE_TOOLS]\n",
"\n",
"[BEGIN OF FORMAT INSTRUCTION]\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
"[END OF FORMAT INSTRUCTION]\n",
"\n",
"[BEGIN OF QUERY]\n",
"My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?\n",
"[END OF QUERY]\n",
"\n",
"[BEGIN OF HISTORY STEPS]\n",
"[{'thought':\"Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?\", 'tool_calls':[], 'step_id':1, 'next_observation':'', 'user_input':\"Yes, we'd love that. We prefer sunny days with warm temperatures.\"}, {'thought':\"Got it. Let me check the weather forecast for Los Angeles to help you find the best date.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"Sounds great! Thanks for your help.\"} ]\n",
"[END OF HISTORY STEPS]\n",
"\n",
" [/INST]\n",
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: False\n",
"----------------------------------------------------------------------------------------------------\n",
"[INST] My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip? [/INST] Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?[INST] Yes, we'd love that. We prefer sunny days with warm temperatures. [/INST] Got it. Let me check the weather forecast for Los Angeles to help you find the best date.[INST] Sounds great! Thanks for your help. [/INST]\n"
]
}
],
"source": [
"messages = [\n",
" {\"role\": \"user\", \"content\": \"My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?\"},\n",
" \n",
" {\"role\": \"assistant\", \"content\": \"Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?\"},\n",
" \n",
" {\"role\": \"user\", \"content\": \"Yes, we'd love that. We prefer sunny days with warm temperatures.\"},\n",
" \n",
" {\"role\": \"assistant\", \"content\": \"Got it. Let me check the weather forecast for Los Angeles to help you find the best date.\"}, # example 1\n",
"\n",
" {\"role\": \"user\", \"content\": \"Sounds great! Thanks for your help.\"},\n",
"]\n",
"\n",
"verify_tokenizer(messages, model_list, tools=tools, BASE_MODELS_DIR=BASE_MODELS_DIR, fc_modes=[True, False], file_suffix=\"example_1\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 2 (Function Call Where Tool_Calls is a List, i.e., Similar to OpenAI Requirements)"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: True\n",
"----------------------------------------------------------------------------------------------------\n",
" [INST]\n",
"[BEGIN OF TASK INSTRUCTION]\n",
"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\n",
"[END OF TASK INSTRUCTION]\n",
"\n",
"[BEGIN OF AVAILABLE_TOOLS]\n",
"[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n",
"[END OF AVAILABLE_TOOLS]\n",
"\n",
"[BEGIN OF FORMAT INSTRUCTION]\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
"[END OF FORMAT INSTRUCTION]\n",
"\n",
"[BEGIN OF QUERY]\n",
"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\n",
"[END OF QUERY]\n",
"\n",
"[BEGIN OF HISTORY STEPS]\n",
"[{'thought':\"\", 'tool_calls':[{\"name\": \"cancel_booking\", \"arguments\": {\"booking_id\": \"YGA123\"}}], 'step_id':1, 'next_observation':{\"data\": {\"class_name\": \"Yoga for Relaxation\", \"class_time\": \"2039-03-10 17:00\", \"instructor_name\": \"Maggie Zhu\", \"cancellation_time\": \"2039-03-09 19:10\"}}}, {'thought':\"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"10:00\"}}], 'step_id':3, 'next_observation':{\"data\": [{\"name\": \"Yoga for Relaxation\", \"instructor\": \"Maggie Zhu\", \"time\": \"2039-03-10 10:00\", \"duration\": 60}, {\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 10:30\", \"duration\": 30}]}}, {'thought':\"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\", 'tool_calls':[], 'step_id':4, 'next_observation':'', 'user_input':\"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"08:00\"}}], 'step_id':5, 'next_observation':{\"data\": [{\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 08:00\", \"duration\": 30}]}} ]\n",
"[END OF HISTORY STEPS]\n",
"\n",
" [/INST]\n"
]
}
],
"source": [
"messages_2 = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"cancel_booking\",\n",
" \"arguments\": {\n",
" \"booking_id\": \"YGA123\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"cancel_booking\",\n",
" \"content\": {\n",
" \"data\": {\n",
" \"class_name\": \"Yoga for Relaxation\",\n",
" \"class_time\": \"2039-03-10 17:00\",\n",
" \"instructor_name\": \"Maggie Zhu\",\n",
" \"cancellation_time\": \"2039-03-09 19:10\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"10:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Yoga for Relaxation\",\n",
" \"instructor\": \"Maggie Zhu\",\n",
" \"time\": \"2039-03-10 10:00\",\n",
" \"duration\": 60\n",
" },\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 10:30\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"08:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 08:00\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" }\n",
"]\n",
"verify_tokenizer(messages_2, model_list, tools, BASE_MODELS_DIR, fc_modes = [True], file_suffix = \"example_2\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 2 (Function Call Where Tool_Calls is not a List)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"messages_2 = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": {\n",
" \"name\": \"cancel_booking\",\n",
" \"arguments\": {\n",
" \"booking_id\": \"YGA123\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"cancel_booking\",\n",
" \"content\": {\n",
" \"data\": {\n",
" \"class_name\": \"Yoga for Relaxation\",\n",
" \"class_time\": \"2039-03-10 17:00\",\n",
" \"instructor_name\": \"Maggie Zhu\",\n",
" \"cancellation_time\": \"2039-03-09 19:10\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"10:00\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Yoga for Relaxation\",\n",
" \"instructor\": \"Maggie Zhu\",\n",
" \"time\": \"2039-03-10 10:00\",\n",
" \"duration\": 60\n",
" },\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 10:30\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"08:00\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 08:00\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" }\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"ename": "TemplateError",
"evalue": "The tool_calls must be a list!",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTemplateError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[77], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mtool_calls is similar to OpenAI requirements, and it should be a list of API call(s), not a dictionary \u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[43mverify_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages_2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_list\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBASE_MODELS_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfc_modes\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_suffix\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample_2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[72], line 13\u001b[0m, in \u001b[0;36mverify_tokenizer\u001b[0;34m(messages, model_list, tools, BASE_MODELS_DIR, fc_modes, file_suffix)\u001b[0m\n\u001b[1;32m 10\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(checkpoint)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fc_mode:\n\u001b[0;32m---> 13\u001b[0m model_input \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_chat_template\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43mtokenize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 19\u001b[0m model_input \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mapply_chat_template(\n\u001b[1;32m 20\u001b[0m messages,\n\u001b[1;32m 21\u001b[0m tokenize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 22\u001b[0m )\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1869\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.apply_chat_template\u001b[0;34m(self, conversation, tools, documents, chat_template, add_generation_prompt, continue_final_message, tokenize, padding, truncation, max_length, return_tensors, return_dict, return_assistant_tokens_mask, tokenizer_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 1867\u001b[0m all_generation_indices\u001b[38;5;241m.\u001b[39mappend(generation_indices)\n\u001b[1;32m 1868\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1869\u001b[0m rendered_chat \u001b[38;5;241m=\u001b[39m \u001b[43mcompiled_template\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrender\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1870\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1871\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtool_schemas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1872\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdocuments\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1873\u001b[0m \u001b[43m \u001b[49m\u001b[43madd_generation_prompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_generation_prompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1874\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtemplate_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1875\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1876\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m continue_final_message:\n\u001b[1;32m 1877\u001b[0m final_message \u001b[38;5;241m=\u001b[39m chat[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/environment.py:1304\u001b[0m, in \u001b[0;36mTemplate.render\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment\u001b[38;5;241m.\u001b[39mconcat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mroot_render_func(ctx)) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[0;32m-> 1304\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menvironment\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/environment.py:939\u001b[0m, in \u001b[0;36mEnvironment.handle_exception\u001b[0;34m(self, source)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Exception handling helper. This is used internally to either raise\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03mrewritten exceptions or return a rendered traceback for the template.\u001b[39;00m\n\u001b[1;32m 936\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 937\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdebug\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rewrite_traceback_stack\n\u001b[0;32m--> 939\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m rewrite_traceback_stack(source\u001b[38;5;241m=\u001b[39msource)\n",
"File \u001b[0;32m:87\u001b[0m, in \u001b[0;36mtop-level template code\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/sandbox.py:394\u001b[0m, in \u001b[0;36mSandboxedEnvironment.call\u001b[0;34m(_SandboxedEnvironment__self, _SandboxedEnvironment__context, _SandboxedEnvironment__obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m __self\u001b[38;5;241m.\u001b[39mis_safe_callable(__obj):\n\u001b[1;32m 393\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SecurityError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m__obj\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m is not safely callable\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 394\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m__context\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m__obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/transformers/utils/chat_template_utils.py:412\u001b[0m, in \u001b[0;36m_compile_jinja_template..raise_exception\u001b[0;34m(message)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_exception\u001b[39m(message):\n\u001b[0;32m--> 412\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m jinja2\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mTemplateError(message)\n",
"\u001b[0;31mTemplateError\u001b[0m: The tool_calls must be a list!"
]
}
],
"source": [
"\"\"\"\n",
"tool_calls is similar to OpenAI requirements, and it should be a list of API call(s), not a dictionary \n",
"\"\"\"\n",
"verify_tokenizer(messages_2, model_list, tools, BASE_MODELS_DIR, fc_modes = [True], file_suffix = \"example_2\")"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"ename": "TemplateError",
"evalue": "Conversation roles must alternate user/assistant/user/assistant/...",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTemplateError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[85], line 6\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mIf you're using the tool_call format, you must adhere to the specific requirements of the tool_call chat template\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03minstead of using the general chat template.\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;03mTemplateError: Conversation roles must alternate user/assistant/user/assistant/...\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[43mverify_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages_2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_list\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBASE_MODELS_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfc_modes\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_suffix\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample_2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[78], line 19\u001b[0m, in \u001b[0;36mverify_tokenizer\u001b[0;34m(messages, model_list, tools, BASE_MODELS_DIR, fc_modes, file_suffix)\u001b[0m\n\u001b[1;32m 13\u001b[0m model_input \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mapply_chat_template(\n\u001b[1;32m 14\u001b[0m messages,\n\u001b[1;32m 15\u001b[0m tools\u001b[38;5;241m=\u001b[39mtools,\n\u001b[1;32m 16\u001b[0m tokenize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 17\u001b[0m )\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 19\u001b[0m model_input \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_chat_template\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mtokenize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxlam_8x22b_r\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m checkpoint:\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m100\u001b[39m)\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1869\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.apply_chat_template\u001b[0;34m(self, conversation, tools, documents, chat_template, add_generation_prompt, continue_final_message, tokenize, padding, truncation, max_length, return_tensors, return_dict, return_assistant_tokens_mask, tokenizer_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 1867\u001b[0m all_generation_indices\u001b[38;5;241m.\u001b[39mappend(generation_indices)\n\u001b[1;32m 1868\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1869\u001b[0m rendered_chat \u001b[38;5;241m=\u001b[39m \u001b[43mcompiled_template\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrender\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1870\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1871\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtool_schemas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1872\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdocuments\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1873\u001b[0m \u001b[43m \u001b[49m\u001b[43madd_generation_prompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_generation_prompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1874\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtemplate_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1875\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1876\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m continue_final_message:\n\u001b[1;32m 1877\u001b[0m final_message \u001b[38;5;241m=\u001b[39m chat[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/environment.py:1304\u001b[0m, in \u001b[0;36mTemplate.render\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment\u001b[38;5;241m.\u001b[39mconcat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mroot_render_func(ctx)) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[0;32m-> 1304\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menvironment\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/environment.py:939\u001b[0m, in \u001b[0;36mEnvironment.handle_exception\u001b[0;34m(self, source)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Exception handling helper. This is used internally to either raise\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03mrewritten exceptions or return a rendered traceback for the template.\u001b[39;00m\n\u001b[1;32m 936\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 937\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdebug\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rewrite_traceback_stack\n\u001b[0;32m--> 939\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m rewrite_traceback_stack(source\u001b[38;5;241m=\u001b[39msource)\n",
"File \u001b[0;32m:1\u001b[0m, in \u001b[0;36mtop-level template code\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/sandbox.py:394\u001b[0m, in \u001b[0;36mSandboxedEnvironment.call\u001b[0;34m(_SandboxedEnvironment__self, _SandboxedEnvironment__context, _SandboxedEnvironment__obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m __self\u001b[38;5;241m.\u001b[39mis_safe_callable(__obj):\n\u001b[1;32m 393\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SecurityError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m__obj\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m is not safely callable\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 394\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m__context\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m__obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/transformers/utils/chat_template_utils.py:412\u001b[0m, in \u001b[0;36m_compile_jinja_template..raise_exception\u001b[0;34m(message)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_exception\u001b[39m(message):\n\u001b[0;32m--> 412\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m jinja2\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mTemplateError(message)\n",
"\u001b[0;31mTemplateError\u001b[0m: Conversation roles must alternate user/assistant/user/assistant/..."
]
}
],
"source": [
"\"\"\"\n",
"If you're using the tool_call format, you must adhere to the specific requirements of the tool_call chat template\n",
"instead of using the general chat template.\n",
"TemplateError: Conversation roles must alternate user/assistant/user/assistant/...\n",
"\"\"\"\n",
"verify_tokenizer(messages_2, model_list, tools, BASE_MODELS_DIR, fc_modes = [False], file_suffix = \"example_2\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 3 (add a user turn after user/assistant/tool)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"messages_3 = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"cancel_booking\",\n",
" \"arguments\": {\n",
" \"booking_id\": \"YGA123\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"cancel_booking\",\n",
" \"content\": {\n",
" \"data\": {\n",
" \"class_name\": \"Yoga for Relaxation\",\n",
" \"class_time\": \"2039-03-10 17:00\",\n",
" \"instructor_name\": \"Maggie Zhu\",\n",
" \"cancellation_time\": \"2039-03-09 19:10\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"10:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Yoga for Relaxation\",\n",
" \"instructor\": \"Maggie Zhu\",\n",
" \"time\": \"2039-03-10 10:00\",\n",
" \"duration\": 60\n",
" },\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 10:30\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"08:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 08:00\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Thanks very much for your help!\"\n",
" },\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"ename": "TemplateError",
"evalue": "After the optional system message, conversation roles can only be from user/assistant/tool; After each tool message, the next message must be from the assistant",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTemplateError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[79], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mAfter the optional system message, conversation roles can only be from user/assistant/tool; \u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03mAfter each tool message, the next message must be from the assistant\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mverify_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages_3\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_list\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBASE_MODELS_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfc_modes\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_suffix\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample_3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"Cell \u001b[0;32mIn[72], line 13\u001b[0m, in \u001b[0;36mverify_tokenizer\u001b[0;34m(messages, model_list, tools, BASE_MODELS_DIR, fc_modes, file_suffix)\u001b[0m\n\u001b[1;32m 10\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(checkpoint)\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fc_mode:\n\u001b[0;32m---> 13\u001b[0m model_input \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_chat_template\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43mtokenize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 19\u001b[0m model_input \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mapply_chat_template(\n\u001b[1;32m 20\u001b[0m messages,\n\u001b[1;32m 21\u001b[0m tokenize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 22\u001b[0m )\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1869\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.apply_chat_template\u001b[0;34m(self, conversation, tools, documents, chat_template, add_generation_prompt, continue_final_message, tokenize, padding, truncation, max_length, return_tensors, return_dict, return_assistant_tokens_mask, tokenizer_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 1867\u001b[0m all_generation_indices\u001b[38;5;241m.\u001b[39mappend(generation_indices)\n\u001b[1;32m 1868\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1869\u001b[0m rendered_chat \u001b[38;5;241m=\u001b[39m \u001b[43mcompiled_template\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrender\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1870\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1871\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtool_schemas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1872\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdocuments\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1873\u001b[0m \u001b[43m \u001b[49m\u001b[43madd_generation_prompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_generation_prompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1874\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtemplate_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1875\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1876\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m continue_final_message:\n\u001b[1;32m 1877\u001b[0m final_message \u001b[38;5;241m=\u001b[39m chat[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/environment.py:1304\u001b[0m, in \u001b[0;36mTemplate.render\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment\u001b[38;5;241m.\u001b[39mconcat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mroot_render_func(ctx)) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[0;32m-> 1304\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menvironment\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/environment.py:939\u001b[0m, in \u001b[0;36mEnvironment.handle_exception\u001b[0;34m(self, source)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Exception handling helper. This is used internally to either raise\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03mrewritten exceptions or return a rendered traceback for the template.\u001b[39;00m\n\u001b[1;32m 936\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 937\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdebug\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rewrite_traceback_stack\n\u001b[0;32m--> 939\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m rewrite_traceback_stack(source\u001b[38;5;241m=\u001b[39msource)\n",
"File \u001b[0;32m:21\u001b[0m, in \u001b[0;36mtop-level template code\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/jinja2/sandbox.py:394\u001b[0m, in \u001b[0;36mSandboxedEnvironment.call\u001b[0;34m(_SandboxedEnvironment__self, _SandboxedEnvironment__context, _SandboxedEnvironment__obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m __self\u001b[38;5;241m.\u001b[39mis_safe_callable(__obj):\n\u001b[1;32m 393\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SecurityError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m__obj\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m is not safely callable\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 394\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m__context\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcall\u001b[49m\u001b[43m(\u001b[49m\u001b[43m__obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/export/agentstudio-family/miniconda3/envs/vllm_inference/lib/python3.11/site-packages/transformers/utils/chat_template_utils.py:412\u001b[0m, in \u001b[0;36m_compile_jinja_template..raise_exception\u001b[0;34m(message)\u001b[0m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_exception\u001b[39m(message):\n\u001b[0;32m--> 412\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m jinja2\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mTemplateError(message)\n",
"\u001b[0;31mTemplateError\u001b[0m: After the optional system message, conversation roles can only be from user/assistant/tool; After each tool message, the next message must be from the assistant"
]
}
],
"source": [
"\"\"\"\n",
"After the optional system message, conversation roles can only be from user/assistant/tool; \n",
"After each tool message, the next message must be from the assistant\n",
"\"\"\"\n",
"verify_tokenizer(messages_3, model_list, tools, BASE_MODELS_DIR, fc_modes = [True], file_suffix = \"example_3\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 4 (add an assistant/user turn after user/assistant/tool)"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: True\n",
"----------------------------------------------------------------------------------------------------\n",
" [INST]\n",
"[BEGIN OF TASK INSTRUCTION]\n",
"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\n",
"[END OF TASK INSTRUCTION]\n",
"\n",
"[BEGIN OF AVAILABLE_TOOLS]\n",
"[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n",
"[END OF AVAILABLE_TOOLS]\n",
"\n",
"[BEGIN OF FORMAT INSTRUCTION]\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
"[END OF FORMAT INSTRUCTION]\n",
"\n",
"[BEGIN OF QUERY]\n",
"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\n",
"[END OF QUERY]\n",
"\n",
"[BEGIN OF HISTORY STEPS]\n",
"[{'thought':\"\", 'tool_calls':[{\"name\": \"cancel_booking\", \"arguments\": {\"booking_id\": \"YGA123\"}}], 'step_id':1, 'next_observation':{\"data\": {\"class_name\": \"Yoga for Relaxation\", \"class_time\": \"2039-03-10 17:00\", \"instructor_name\": \"Maggie Zhu\", \"cancellation_time\": \"2039-03-09 19:10\"}}}, {'thought':\"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"10:00\"}}], 'step_id':3, 'next_observation':{\"data\": [{\"name\": \"Yoga for Relaxation\", \"instructor\": \"Maggie Zhu\", \"time\": \"2039-03-10 10:00\", \"duration\": 60}, {\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 10:30\", \"duration\": 30}]}}, {'thought':\"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\", 'tool_calls':[], 'step_id':4, 'next_observation':'', 'user_input':\"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"08:00\"}}], 'step_id':5, 'next_observation':{\"data\": [{\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 08:00\", \"duration\": 30}]}}, {'thought':\"I have booked a meditation class for you!\", 'tool_calls':[], 'step_id':6, 'next_observation':'', 'user_input':\"Thanks very much for your help!\"} ]\n",
"[END OF HISTORY STEPS]\n",
"\n",
" [/INST]\n"
]
}
],
"source": [
"messages_4 = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"cancel_booking\",\n",
" \"arguments\": {\n",
" \"booking_id\": \"YGA123\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"cancel_booking\",\n",
" \"content\": {\n",
" \"data\": {\n",
" \"class_name\": \"Yoga for Relaxation\",\n",
" \"class_time\": \"2039-03-10 17:00\",\n",
" \"instructor_name\": \"Maggie Zhu\",\n",
" \"cancellation_time\": \"2039-03-09 19:10\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"10:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Yoga for Relaxation\",\n",
" \"instructor\": \"Maggie Zhu\",\n",
" \"time\": \"2039-03-10 10:00\",\n",
" \"duration\": 60\n",
" },\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 10:30\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"08:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 08:00\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I have booked a meditation class for you!\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Thanks very much for your help!\"\n",
" },\n",
"]\n",
"verify_tokenizer(messages_4, model_list, tools, BASE_MODELS_DIR, fc_modes = [True], file_suffix = \"example_4\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 5 (add Thought to \"content\")"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: True\n",
"----------------------------------------------------------------------------------------------------\n",
" [INST]\n",
"[BEGIN OF TASK INSTRUCTION]\n",
"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\n",
"[END OF TASK INSTRUCTION]\n",
"\n",
"[BEGIN OF AVAILABLE_TOOLS]\n",
"[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n",
"[END OF AVAILABLE_TOOLS]\n",
"\n",
"[BEGIN OF FORMAT INSTRUCTION]\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
"[END OF FORMAT INSTRUCTION]\n",
"\n",
"[BEGIN OF QUERY]\n",
"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\n",
"[END OF QUERY]\n",
"\n",
"[BEGIN OF HISTORY STEPS]\n",
"[{'thought':\"Sure! I will help you cancel the booking. Let me check the booking details first.\", 'tool_calls':[{\"name\": \"cancel_booking\", \"arguments\": {\"booking_id\": \"YGA123\"}}], 'step_id':1, 'next_observation':{\"data\": {\"class_name\": \"Yoga for Relaxation\", \"class_time\": \"2039-03-10 17:00\", \"instructor_name\": \"Maggie Zhu\", \"cancellation_time\": \"2039-03-09 19:10\"}}}, {'thought':\"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"10:00\"}}], 'step_id':3, 'next_observation':{\"data\": [{\"name\": \"Yoga for Relaxation\", \"instructor\": \"Maggie Zhu\", \"time\": \"2039-03-10 10:00\", \"duration\": 60}, {\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 10:30\", \"duration\": 30}]}}, {'thought':\"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\", 'tool_calls':[], 'step_id':4, 'next_observation':'', 'user_input':\"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"08:00\"}}], 'step_id':5, 'next_observation':{\"data\": [{\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 08:00\", \"duration\": 30}]}}, {'thought':\"I have booked a meditation class for you!\", 'tool_calls':[], 'step_id':6, 'next_observation':'', 'user_input':\"Thanks very much for your help!\"} ]\n",
"[END OF HISTORY STEPS]\n",
"\n",
" [/INST]\n"
]
}
],
"source": [
"messages_5 = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Sure! I will help you cancel the booking. Let me check the booking details first.\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"cancel_booking\",\n",
" \"arguments\": {\n",
" \"booking_id\": \"YGA123\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"cancel_booking\",\n",
" \"content\": {\n",
" \"data\": {\n",
" \"class_name\": \"Yoga for Relaxation\",\n",
" \"class_time\": \"2039-03-10 17:00\",\n",
" \"instructor_name\": \"Maggie Zhu\",\n",
" \"cancellation_time\": \"2039-03-09 19:10\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"10:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Yoga for Relaxation\",\n",
" \"instructor\": \"Maggie Zhu\",\n",
" \"time\": \"2039-03-10 10:00\",\n",
" \"duration\": 60\n",
" },\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 10:30\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"get_classes\",\n",
" \"arguments\": {\n",
" \"date\": \"2039-03-10\",\n",
" \"time\": \"08:00\"\n",
" }\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"role\": \"tool\",\n",
" \"name\": \"get_classes\",\n",
" \"content\": {\n",
" \"data\": [\n",
" {\n",
" \"name\": \"Meditation for Beginners\",\n",
" \"instructor\": \"John Lee\",\n",
" \"time\": \"2039-03-10 08:00\",\n",
" \"duration\": 30\n",
" }\n",
" ]\n",
" }\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I have booked a meditation class for you!\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Thanks very much for your help!\"\n",
" },\n",
"]\n",
"verify_tokenizer(messages_5, model_list, tools, BASE_MODELS_DIR, fc_modes = [True], file_suffix = \"example_5\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 6 (Parallem Function Call)"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: True\n",
"----------------------------------------------------------------------------------------------------\n",
" [INST]\n",
"[BEGIN OF TASK INSTRUCTION]\n",
"You are an expert in composing functions. You are given a question and a set of possible functions. \n",
"Based on the question, you will need to make one or more function/tool calls to achieve the purpose. \n",
"If none of the function can be used, point it out and refuse to answer. \n",
"If the given question lacks the parameters required by the function, also point it out.\n",
"[END OF TASK INSTRUCTION]\n",
"\n",
"[BEGIN OF AVAILABLE_TOOLS]\n",
"[{'name': 'sticker_trending', 'description': 'Fetches the latest trending stickers from Giphy using the specified parameters.', 'parameters': {'s': {'description': 'The term or phrase to translate into a sticker.', 'type': 'str', 'default': 'hungry'}, 'limit': {'description': 'The number of results to return, with a maximum of 100. Defaults to 25.', 'type': 'str, optional', 'default': ''}, 'offset': {'description': 'The results offset, defaults to 0.', 'type': 'str, optional', 'default': ''}, 'fmt': {'description': \"The format in which to return results, either 'html' or 'json'.\", 'type': 'str, optional', 'default': ''}, 'rating': {'description': \"The content rating to limit results to, one of 'y', 'g', 'pg', 'pg-13', or 'r'.\", 'type': 'str, optional', 'default': ''}}}, {'name': 'reels_downloader', 'description': 'Downloads Instagram reels or posts using a given link and RapidAPI key.', 'parameters': {'link': {'description': 'The URL of the Instagram media (reel or post) to be downloaded.', 'type': 'str', 'default': 'https://www.instagram.com/reels/CsinDvAgoYA/'}}}]\n",
"[END OF AVAILABLE_TOOLS]\n",
"\n",
"[BEGIN OF FORMAT INSTRUCTION]\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
"[END OF FORMAT INSTRUCTION]\n",
"\n",
"[BEGIN OF QUERY]\n",
"Get trending stickers for the term 'cat' in HTML format. Also, find trending stickers for 'dog' with a G rating and limit of 20.\n",
"[END OF QUERY]\n",
"\n",
"[BEGIN OF HISTORY STEPS]\n",
"[{'thought':\"Two requests are formulated: one for 'cat' stickers with a specific format, and another for 'dog' stickers with rating and limit.\", 'tool_calls':[{\"name\": \"sticker_trending\", \"arguments\": {\"s\": \"cat\", \"fmt\": \"html\"}}, {\"name\": \"sticker_trending\", \"arguments\": {\"s\": \"dog\", \"rating\": \"g\", \"limit\": \"20\"}}], 'step_id':1, 'next_observation':''}]\n",
"[END OF HISTORY STEPS]\n",
"\n",
" [/INST]\n"
]
}
],
"source": [
"messages_6 = [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are an expert in composing functions. You are given a question and a set of possible functions. \\nBased on the question, you will need to make one or more function/tool calls to achieve the purpose. \\nIf none of the function can be used, point it out and refuse to answer. \\nIf the given question lacks the parameters required by the function, also point it out.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Get trending stickers for the term 'cat' in HTML format. Also, find trending stickers for 'dog' with a G rating and limit of 20.\"\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Two requests are formulated: one for 'cat' stickers with a specific format, and another for 'dog' stickers with rating and limit.\",\n",
" \"tool_calls\": [\n",
" {\n",
" \"name\": \"sticker_trending\",\n",
" \"arguments\": {\n",
" \"s\": \"cat\",\n",
" \"fmt\": \"html\"\n",
" }\n",
" },\n",
" {\n",
" \"name\": \"sticker_trending\",\n",
" \"arguments\": {\n",
" \"s\": \"dog\",\n",
" \"rating\": \"g\",\n",
" \"limit\": \"20\"\n",
" }\n",
" }\n",
" ]\n",
" }\n",
"]\n",
"\n",
"tools_6 = [\n",
" {\n",
" \"name\": \"sticker_trending\",\n",
" \"description\": \"Fetches the latest trending stickers from Giphy using the specified parameters.\",\n",
" \"parameters\": {\n",
" \"s\": {\n",
" \"description\": \"The term or phrase to translate into a sticker.\",\n",
" \"type\": \"str\",\n",
" \"default\": \"hungry\"\n",
" },\n",
" \"limit\": {\n",
" \"description\": \"The number of results to return, with a maximum of 100. Defaults to 25.\",\n",
" \"type\": \"str, optional\",\n",
" \"default\": \"\"\n",
" },\n",
" \"offset\": {\n",
" \"description\": \"The results offset, defaults to 0.\",\n",
" \"type\": \"str, optional\",\n",
" \"default\": \"\"\n",
" },\n",
" \"fmt\": {\n",
" \"description\": \"The format in which to return results, either 'html' or 'json'.\",\n",
" \"type\": \"str, optional\",\n",
" \"default\": \"\"\n",
" },\n",
" \"rating\": {\n",
" \"description\": \"The content rating to limit results to, one of 'y', 'g', 'pg', 'pg-13', or 'r'.\",\n",
" \"type\": \"str, optional\",\n",
" \"default\": \"\"\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"name\": \"reels_downloader\",\n",
" \"description\": \"Downloads Instagram reels or posts using a given link and RapidAPI key.\",\n",
" \"parameters\": {\n",
" \"link\": {\n",
" \"description\": \"The URL of the Instagram media (reel or post) to be downloaded.\",\n",
" \"type\": \"str\",\n",
" \"default\": \"https://www.instagram.com/reels/CsinDvAgoYA/\"\n",
" }\n",
" }\n",
" }\n",
"]\n",
"verify_tokenizer(messages_6, model_list, tools_6, BASE_MODELS_DIR, fc_modes = [True], file_suffix = \"example_6\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Example 7 (General Chat with a System Message at the Beginning)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: True\n",
"----------------------------------------------------------------------------------------------------\n",
" [INST]\n",
"[BEGIN OF TASK INSTRUCTION]\n",
"You are a help AI assistant developed by the xLAM team in Salesforce AI Research.\n",
"[END OF TASK INSTRUCTION]\n",
"\n",
"[BEGIN OF AVAILABLE_TOOLS]\n",
"[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n",
"[END OF AVAILABLE_TOOLS]\n",
"\n",
"[BEGIN OF FORMAT INSTRUCTION]\n",
"Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n",
"```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n",
"[END OF FORMAT INSTRUCTION]\n",
"\n",
"[BEGIN OF QUERY]\n",
"My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?\n",
"[END OF QUERY]\n",
"\n",
"[BEGIN OF HISTORY STEPS]\n",
"[{'thought':\"Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?\", 'tool_calls':[], 'step_id':1, 'next_observation':'', 'user_input':\"Yes, we'd love that. We prefer sunny days with warm temperatures.\"}, {'thought':\"Got it. Let me check the weather forecast for Los Angeles to help you find the best date.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"Sounds great! Thanks for your help.\"} ]\n",
"[END OF HISTORY STEPS]\n",
"\n",
" [/INST]\n",
"****************************************************************************************************\n",
"Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n",
"----------------------------------------------------------------------------------------------------\n",
"FC Mode: False\n",
"----------------------------------------------------------------------------------------------------\n",
"[INST] You are a help AI assistant developed by the xLAM team in Salesforce AI Research.\n",
"\n",
"My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip? [/INST] Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?[INST] Yes, we'd love that. We prefer sunny days with warm temperatures. [/INST] Got it. Let me check the weather forecast for Los Angeles to help you find the best date.[INST] Sounds great! Thanks for your help. [/INST]\n"
]
}
],
"source": [
"messages_7 = [\n",
" {\"role\": \"system\", \"content\": \"You are a help AI assistant developed by the xLAM team in Salesforce AI Research.\"},\n",
" \n",
" {\"role\": \"user\", \"content\": \"My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?\"},\n",
" \n",
" {\"role\": \"assistant\", \"content\": \"Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?\"},\n",
" \n",
" {\"role\": \"user\", \"content\": \"Yes, we'd love that. We prefer sunny days with warm temperatures.\"},\n",
" \n",
" {\"role\": \"assistant\", \"content\": \"Got it. Let me check the weather forecast for Los Angeles to help you find the best date.\"},\n",
" \n",
" {\"role\": \"user\", \"content\": \"Sounds great! Thanks for your help.\"},\n",
"]\n",
"\n",
"model_list_2 = [\n",
" # \"xlam_7b_r\", \n",
" \"xlam_8x7b_r\",\n",
" \"xlam_8x22b_r\"\n",
"]\n",
"\"\"\"\n",
"The mistral-v0.2 old tokenizer does not support system message. \n",
"We need to update it to align with their latest changes. \n",
"\"\"\"\n",
"verify_tokenizer(messages_7, model_list_2, tools, BASE_MODELS_DIR, fc_modes = [True, False], file_suffix = \"example_7\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}