thanks and question function-calling.

#17
by NickyNicky - opened

Thanks for the model,

I would like to know what the template is like with the function calling for this model.

image.png

messages = [
            {"role": "user", "content": "hello user"},
            {"role": "assistant", "content": "hello assistant"},
            ]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# output:
# <s>[INST] hello user [/INST]hello assistant</s>

function calling?

image.png

thank you very much.

I wouldn't know if this implementation is correct?

# Function to perform inference with cloned models
def generate(prompt, model, tokenizer, temperature=0.33, max_new_tokens=10):
    function="""[
    {
        "name": "convert_currency",
        "description": "Convert amount from one currency to another",
        "parameters": {
            "type": "object",
            "properties": {
                "amount": {
                    "type": "number",
                    "description": "The amount to convert"
                },
                "from_currency": {
                    "type": "string",
                    "description": "The currency to convert from"
                },
                "to_currency": {
                    "type": "string",
                    "description": "The currency to convert to"
                }
            },
            "required": [
                "amount",
                "from_currency",
                "to_currency"
            ]
        }
    }
]""" 
   
    f= '[{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": "value_2", ...}}]'
    text = f"""<s>[AVAILABLE_TOOLS]{function} [/AVAILABLE_TOOLS][TOOL_CALLS]{f} [TOOL_CALLS][INST]{prompt} [/INST]"""
    input = tokenizer(text, return_tensors="pt", add_special_tokens=False).to(model.device)
    result=None

    generation_config = GenerationConfig(
        max_new_tokens=len(input['input_ids'][0]) + max_new_tokens,
        temperature=temperature,
        top_k=50,
        # repetition_penalty=1.04,
        do_sample=True, # True False
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )

    with torch.no_grad():
        outputs = model.generate(**input,
                                 generation_config=generation_config,
                                 stopping_criteria=stopping_criteria_list,
                                 )
        print("len:",len(outputs[0]))
        # torch.cuda.synchronize()
        result = tokenizer.decode(outputs[0], skip_special_tokens=False).replace(text, "")#.split("</s>")[0]
        # print(result)
        liberaMemoria()
    return result

gen = generate("Hi, I need to convert 500 US dollars to Euros. Can you help me with that?", model, tokenizer, temperature=0.44, max_new_tokens=2000)
print(gen)

output:

len: 329
<s>[AVAILABLE_TOOLS] [
    {
        "name": "convert_currency",
        "description": "Convert amount from one currency to another",
        "parameters": {
            "type": "object",
            "properties": {
                "amount": {
                    "type": "number",
                    "description": "The amount to convert"
                },
                "from_currency": {
                    "type": "string",
                    "description": "The currency to convert from"
                },
                "to_currency": {
                    "type": "string",
                    "description": "The currency to convert to"
                }
            },
            "required": [
                "amount",
                "from_currency",
                "to_currency"
            ]
        }
    }
] [/AVAILABLE_TOOLS][TOOL_CALLS] [{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": "value_2", ...}}] [TOOL_CALLS][INST] Hi, I need to convert 500 US dollars to Euros. Can you help me with that? [/INST][TOOL_CALLS] [{"name": "convert_currency", "arguments": {"amount": 500, "from_currency": "USD", "to_currency": "EUR"}}] [TOOL_CALLS] According to the current exchange rate, 500 US dollars is approximately 440.38 Euros.</s>

Is good a template function or no?

Using the mistral_inference library to leverage mistral's own template, I ran the following:

from mistral_common.protocol.instruct.tool_calls import Function, Tool
from mistral_inference.model import Transformer
from mistral_inference.generate import generate

from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest


tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
model = Transformer.from_folder(mistral_models_path)

completion_request = ChatCompletionRequest(
    tools=[
        Tool(
            function=Function(
                name="get_current_weather",
                description="Get the current weather",
                parameters={
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "format": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "description": "The temperature unit to use. Infer this from the users location.",
                        },
                    },
                    "required": ["location", "format"],
                },
            )
        ),
    ],
    messages=[
        UserMessage(content="What's the weather like in Melbourne?"),
        ],
)

tokenized_result = tokenizer.encode_chat_completion(completion_request)

tokens = tokenized_result.tokens

prompt_text = tokenized_result.text

print(prompt_text)

The output of prompt_text is:

<s><s>[AVAILABLE_TOOLS]▁[{"type":▁"function",▁"function":▁{"name":▁"get_current_weather",▁"description":▁"Get▁the▁current▁weather",▁"parameters":▁{"type":▁"object",▁"properties":▁{"location":▁{"type":▁"string",▁"description":▁"The▁city▁and▁state,▁e.g.▁San▁Francisco,▁CA"},▁"format":▁{"type":▁"string",▁"enum":▁["celsius",▁"fahrenheit"],▁"description":▁"The▁temperature▁unit▁to▁use.▁Infer▁this▁from▁the▁users▁location."}},▁"required":▁["location",▁"format"]}}}][/AVAILABLE_TOOLS][INST]▁What's▁the▁weather▁like▁in▁Melbourne?[/INST]

Note that the funny underscores can be replaced with spaces in your implementation. I think they must be mapped to the same token in the tokenizer possibly.

The model's output is:

[{"name": "get_current_weather", "arguments": {"location": "Melbourne, AU", "format": "celsius"}}]

Note it is incorrectly attempting to answer the question after calling the function. I wonder if there is an easy way to stop generation after a function call, so we can execute the function and add the result back to context. Also keen to know the recommended format for adding function results. I'd imagine the user is not ideal because then the assistant would already assumer the answer has access to this information. Possibly we just append it to the assistant's context?

Hoped the other tools tokens would be used to delimit, but I can't see them in the response.

Thank you very much for the response,
I executed the code you provided but I was left with doubts about the model's response, and I made a separate implementation of how it works without the code they provided but I was left with doubts about whether to pass the response to the model or to the user or to what spatial token.

Per @lcahill post, I tested using:

[AVAILABLE_TOOLS] [{"type": "function", "function": {"name": "get_current_weather", "description": "Get the current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use. Infer this from the users location."}}, "required": ["location", "format"]}}}][/AVAILABLE_TOOLS] [INST] What's the weather like in Melbourne?[/INST]

Response received:

[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Melbourne, AU", "format": "celsius"}}]

tested but not if it is completely correct

[TOOL_CALLS] [{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": "value_2", ...}}] [TOOL_CALLS][INST] Hi, I need to convert 500 US dollars to Euros. Can you help me with that? [/INST][TOOL_CALLS] [{"name": "convert_currency", "arguments": {"amount": 500, "from_currency": "USD", "to_currency": "EUR"}}] [TOOL_CALLS] According to the current exchange rate, 500 US dollars is approximately 440.38 Euros.</s>

this [TOOL_CALLS] before [INST] is a correct? (I have to add this because it doesn't respond correctly every time.)

[TOOL_CALLS] [{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": "value_2", ...}}] [TOOL_CALLS][INST]

resp.

[TOOL_CALLS] [{"name": "convert_currency", "arguments": {"amount": 500, "from_currency": "USD", "to_currency": "EUR"}}] [TOOL_CALLS] According to the current exchange rate, 500 US dollars is approximately 440.38 Euros.</s>

This sentence after [TOOL_CALLS], is a correct?

According to the current exchange rate, 500 US dollars is approximately 440.38 Euros.</s>

The mistral library adds the available tools right before the last user message

[AVAILABLE_TOOLS] [{"name": "get_weather", "parameters": { }}] [/AVAILABLE_TOOLS][INST] Your instructions [/INST] The response</s>

But the problem I'm seeing is that Mistral has trouble calling multiple functions in sequence. After calling a function, it won't output [TOOL_CALLS] to call another function.

I wonder if an adaptation of the (admittedly ugly) implementation in my comment on this PR (which allows available tools, tool calls and tool results to be added using the chat_template) works for the conversation example you are attempting?

https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/discussions/21

It is essentually interrupting the generation after the first tool is called, computing the result, adding both to context, then will allow generation to continue (which could be using another tool).

I'm sure if I understand you but I tried:

completion_request = ChatCompletionRequest(
tools=[
Tool(
function=Function(
name="get_current_weather",
description="Get the current weather",
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
},
"required": ["location", "format"],
},
)
)
],
messages=[
UserMessage(content="What's the weather like today in Paris?"),
],
)
print(tokenizer.encode_chat_completion(completion_request).text)

and this is the output:

<s>[AVAILABLE_TOOLS]▁[{"type":▁"function",▁"function":▁{"name":▁"get_current_weather",▁"description":▁"Get▁the▁current▁weather",▁"parameters":▁{"type":▁"object",▁"properties":▁{"location":▁{"type":▁"string",▁"description":▁"The▁city▁and▁state,▁e.g.▁San▁Francisco,▁CA"},▁"format":▁{"type":▁"string",▁"enum":▁["celsius",▁"fahrenheit"],▁"description":▁"The▁temperature▁unit▁to▁use.▁Infer▁this▁from▁the▁users▁location."}},▁"required":▁["location",▁"format"]}}}][/AVAILABLE_TOOLS][INST]▁What's▁the▁weather▁like▁today▁in▁Paris?[/INST]

I hope this help you

This comment has been hidden

I would like to use Mistral-7B-Instruct-v0.3 function calling with the Hugging Face Inference API but it didn't work. Anyone can help me? Tks.

Here are my codes:
import requests
import json
API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fmistralai%2FMistral-7B-Instruct-v0.3%3C%2Fa%3E"
headers = {"Authorization": f"Bearer {API_TOKEN}"}

def query_model(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()

functions = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string", "description": "The city and state, or zip code"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Unit of temperature"}
},
"required": ["location"]
}
}
]

prompt = "Get the current weather in San Francisco?"
payload = {
"inputs": prompt,
"parameters": {
"functions": functions
}
}

output = query_model(payload)

Assuming the API returns a response like this:

{

"function_call": {"name": "get_current_weather", "arguments": {"location": "San Francisco", "unit": "fahrenheit"}}

}

if "function_call" in output:
function_name = output["function_call"]["name"]
arguments = output["function_call"]["arguments"]
# Call your function based on the name and arguments
if function_name == "get_current_weather":
result = get_current_weather(**arguments) # Unpack arguments into function call
print("function call")
print(result)
# Handle other functions...
else:
print("no function call")
print(output) # Handle the case where the model doesn't suggest a function call

Here is the result:
no function call
[{'generated_text': 'Get the current weather in San Francisco?\n\nYou can use the OpenWeatherMap API to get the current weather in San Francisco. Here's a simple example using Python:\n\n```python\nimport requests\n\napi_key = "your_api_key"\nbase_url = "
http://api.openweathermap.org/data/2.5/weather?"\ncity = "San Francisco"\n\ncomplete_url = base_url + "appid=" + api'}]

Sign up or log in to comment