machineuser commited on
Commit
ac33c34
·
1 Parent(s): 507971e

Sync widgets demo

Browse files
packages/jinja/test/e2e.test.js CHANGED
@@ -192,7 +192,7 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({
192
  },
193
  target: `<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
194
  },
195
- "mistralai/Mistral-7B-Instruct-v0.1": {
196
  chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
197
  data: {
198
  messages: EXAMPLE_CHAT,
@@ -440,7 +440,7 @@ describe("End-to-end tests", () => {
440
  });
441
 
442
  it("should parse a chat template from the Hugging Face Hub", async () => {
443
- const repo = "mistralai/Mistral-7B-Instruct-v0.1";
444
  const tokenizerConfig = await (
445
  await downloadFile({
446
  repo,
 
192
  },
193
  target: `<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
194
  },
195
+ "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ": {
196
  chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
197
  data: {
198
  messages: EXAMPLE_CHAT,
 
440
  });
441
 
442
  it("should parse a chat template from the Hugging Face Hub", async () => {
443
+ const repo = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ";
444
  const tokenizerConfig = await (
445
  await downloadFile({
446
  repo,
packages/tasks/package.json CHANGED
@@ -27,7 +27,8 @@
27
  "build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen",
28
  "prepare": "pnpm run build",
29
  "check": "tsc",
30
- "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts"
 
31
  },
32
  "type": "module",
33
  "files": [
@@ -44,6 +45,10 @@
44
  "license": "MIT",
45
  "devDependencies": {
46
  "@types/node": "^20.11.5",
47
- "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz"
 
 
 
 
48
  }
49
  }
 
27
  "build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen",
28
  "prepare": "pnpm run build",
29
  "check": "tsc",
30
+ "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts",
31
+ "inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json"
32
  },
33
  "type": "module",
34
  "files": [
 
45
  "license": "MIT",
46
  "devDependencies": {
47
  "@types/node": "^20.11.5",
48
+ "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",
49
+ "type-fest": "^3.13.1"
50
+ },
51
+ "dependencies": {
52
+ "node-fetch": "^3.3.2"
53
  }
54
  }
packages/tasks/pnpm-lock.yaml CHANGED
@@ -4,6 +4,11 @@ settings:
4
  autoInstallPeers: true
5
  excludeLinksFromLockfile: false
6
 
 
 
 
 
 
7
  devDependencies:
8
  '@types/node':
9
  specifier: ^20.11.5
@@ -11,6 +16,9 @@ devDependencies:
11
  quicktype-core:
12
  specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz
13
  version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz'
 
 
 
14
 
15
  packages:
16
 
@@ -62,6 +70,11 @@ packages:
62
  - encoding
63
  dev: true
64
 
 
 
 
 
 
65
66
  resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
67
  engines: {node: '>=6'}
@@ -72,6 +85,21 @@ packages:
72
  engines: {node: '>=0.8.x'}
73
  dev: true
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
76
  resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
77
  dev: true
@@ -88,6 +116,11 @@ packages:
88
  resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
89
  dev: true
90
 
 
 
 
 
 
91
92
  resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
93
  engines: {node: 4.x || >=6.0.0}
@@ -100,6 +133,15 @@ packages:
100
  whatwg-url: 5.0.0
101
  dev: true
102
 
 
 
 
 
 
 
 
 
 
103
104
  resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
105
  dev: true
@@ -147,6 +189,11 @@ packages:
147
  resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
148
  dev: true
149
 
 
 
 
 
 
150
151
  resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
152
  dev: true
@@ -169,6 +216,11 @@ packages:
169
  resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
170
  dev: true
171
 
 
 
 
 
 
172
173
  resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
174
  dev: true
 
4
  autoInstallPeers: true
5
  excludeLinksFromLockfile: false
6
 
7
+ dependencies:
8
+ node-fetch:
9
+ specifier: ^3.3.2
10
+ version: 3.3.2
11
+
12
  devDependencies:
13
  '@types/node':
14
  specifier: ^20.11.5
 
16
  quicktype-core:
17
  specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz
18
  version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz'
19
+ type-fest:
20
+ specifier: ^3.13.1
21
+ version: 3.13.1
22
 
23
  packages:
24
 
 
70
  - encoding
71
  dev: true
72
 
73
74
+ resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==}
75
+ engines: {node: '>= 12'}
76
+ dev: false
77
+
78
79
  resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
80
  engines: {node: '>=6'}
 
85
  engines: {node: '>=0.8.x'}
86
  dev: true
87
 
88
89
+ resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==}
90
+ engines: {node: ^12.20 || >= 14.13}
91
+ dependencies:
92
+ node-domexception: 1.0.0
93
+ web-streams-polyfill: 3.3.3
94
+ dev: false
95
+
96
97
+ resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==}
98
+ engines: {node: '>=12.20.0'}
99
+ dependencies:
100
+ fetch-blob: 3.2.0
101
+ dev: false
102
+
103
104
  resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
105
  dev: true
 
116
  resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
117
  dev: true
118
 
119
120
+ resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
121
+ engines: {node: '>=10.5.0'}
122
+ dev: false
123
+
124
125
  resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
126
  engines: {node: 4.x || >=6.0.0}
 
133
  whatwg-url: 5.0.0
134
  dev: true
135
 
136
137
+ resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==}
138
+ engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
139
+ dependencies:
140
+ data-uri-to-buffer: 4.0.1
141
+ fetch-blob: 3.2.0
142
+ formdata-polyfill: 4.0.10
143
+ dev: false
144
+
145
146
  resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
147
  dev: true
 
189
  resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
190
  dev: true
191
 
192
193
+ resolution: {integrity: sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==}
194
+ engines: {node: '>=14.16'}
195
+ dev: true
196
+
197
198
  resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
199
  dev: true
 
216
  resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
217
  dev: true
218
 
219
220
+ resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==}
221
+ engines: {node: '>= 8'}
222
+ dev: false
223
+
224
225
  resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
226
  dev: true
packages/tasks/scripts/inference-codegen.ts CHANGED
@@ -57,7 +57,7 @@ async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles:
57
  if (taskId === "text-generation" || taskId === "chat-completion") {
58
  await schema.addSource({
59
  name: `${taskId}-stream-output`,
60
- schema: await fs.readFile(`${taskSpecDir}/output_stream.json`, { encoding: "utf-8" }),
61
  });
62
  }
63
  const inputData = new InputData();
 
57
  if (taskId === "text-generation" || taskId === "chat-completion") {
58
  await schema.addSource({
59
  name: `${taskId}-stream-output`,
60
+ schema: await fs.readFile(`${taskSpecDir}/stream_output.json`, { encoding: "utf-8" }),
61
  });
62
  }
63
  const inputData = new InputData();
packages/tasks/scripts/inference-tgi-import.ts ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Fetches TGI specs and generated JSON schema for input, output and stream_output of
3
+ * text-generation and chat-completion tasks.
4
+ * See https://huggingface.github.io/text-generation-inference/
5
+ */
6
+ import fs from "fs/promises";
7
+ import fetch from "node-fetch";
8
+ import * as path from "node:path/posix";
9
+ import { existsSync as pathExists } from "node:fs";
10
+ import type { JsonObject, JsonValue } from "type-fest";
11
+
12
+ const URL = "https://huggingface.github.io/text-generation-inference/openapi.json";
13
+
14
+ const rootDirFinder = function (): string {
15
+ let currentPath = path.normalize(import.meta.url);
16
+
17
+ while (currentPath !== "/") {
18
+ if (pathExists(path.join(currentPath, "package.json"))) {
19
+ return currentPath;
20
+ }
21
+
22
+ currentPath = path.normalize(path.join(currentPath, ".."));
23
+ }
24
+
25
+ return "/";
26
+ };
27
+
28
+ const rootDir = rootDirFinder();
29
+ const tasksDir = path.join(rootDir, "src", "tasks");
30
+
31
+ function toCamelCase(str: string, joiner = "") {
32
+ return str
33
+ .split(/[-_]/)
34
+ .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
35
+ .join(joiner);
36
+ }
37
+
38
+ async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") {
39
+ console.debug(`✨ Importing`, task, type);
40
+
41
+ console.debug(" 📥 Fetching TGI specs");
42
+ const response = await fetch(URL);
43
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
44
+ const openapi = (await response.json()) as any;
45
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
46
+ const components: Record<string, any> = openapi["components"]["schemas"];
47
+
48
+ // e.g. TextGeneration
49
+ const camelName = toCamelCase(task);
50
+ // e.g. TextGenerationInput
51
+ const camelFullName = camelName + toCamelCase(type);
52
+ const mainComponent = components[mainComponentName];
53
+ const filteredComponents: Record<string, JsonObject> = {};
54
+
55
+ function _scan(data: JsonValue) {
56
+ if (Array.isArray(data) || data instanceof Array) {
57
+ for (const item of data) {
58
+ _scan(item);
59
+ }
60
+ } else if (data && typeof data === "object") {
61
+ for (const key of Object.keys(data)) {
62
+ if (key === "$ref" && typeof data[key] === "string") {
63
+ // Verify reference exists
64
+ const ref = (data[key] as string).split("/").pop() ?? "";
65
+ if (!components[ref]) {
66
+ throw new Error(`Reference not found in components: ${data[key]}`);
67
+ }
68
+
69
+ // Add reference to components to export (and scan it too)
70
+ const newRef = camelFullName + ref.replace(camelName, "");
71
+ if (!filteredComponents[newRef]) {
72
+ components[ref]["title"] = newRef; // Rename title to avoid conflicts
73
+ filteredComponents[newRef] = components[ref];
74
+ _scan(components[ref]);
75
+ }
76
+
77
+ // Updating the reference to new format
78
+ data[key] = `#/$defs/${newRef}`;
79
+ } else {
80
+ _scan(data[key]);
81
+ }
82
+ }
83
+ }
84
+ }
85
+
86
+ console.debug(" 📦 Packaging jsonschema");
87
+ _scan(mainComponent);
88
+
89
+ const prettyName = toCamelCase(task, " ") + " " + toCamelCase(type, " ");
90
+ const inputSchema = {
91
+ $id: `/inference/schemas/${task}/${type}.json`,
92
+ $schema: "http://json-schema.org/draft-06/schema#",
93
+ description:
94
+ prettyName +
95
+ ".\n\nAuto-generated from TGI specs." +
96
+ "\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
97
+ title: camelFullName,
98
+ type: "object",
99
+ required: mainComponent["required"],
100
+ properties: mainComponent["properties"],
101
+ $defs: filteredComponents,
102
+ };
103
+
104
+ const specPath = path.join(tasksDir, task, "spec", `${type}.json`);
105
+ console.debug(" 📂 Exporting", specPath);
106
+ await fs.writeFile(specPath, JSON.stringify(inputSchema, null, 4));
107
+ }
108
+
109
+ await _extractAndAdapt("text-generation", "CompatGenerateRequest", "input");
110
+ await _extractAndAdapt("text-generation", "GenerateResponse", "output");
111
+ await _extractAndAdapt("text-generation", "StreamResponse", "stream_output");
112
+ await _extractAndAdapt("chat-completion", "ChatRequest", "input");
113
+ await _extractAndAdapt("chat-completion", "ChatCompletion", "output");
114
+ await _extractAndAdapt("chat-completion", "ChatCompletionChunk", "stream_output");
115
+ console.debug("✅ All done!");
packages/tasks/src/tasks/chat-completion/inference.ts CHANGED
@@ -5,154 +5,273 @@
5
  */
6
 
7
  /**
8
- * Inputs for ChatCompletion inference
 
 
 
 
9
  */
10
  export interface ChatCompletionInput {
11
  /**
12
  * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
13
- * frequency in the text so far, decreasing the model's likelihood to repeat the same line
14
- * verbatim.
15
  */
16
  frequency_penalty?: number;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  /**
18
  * The maximum number of tokens that can be generated in the chat completion.
19
  */
20
  max_tokens?: number;
 
 
 
21
  messages: ChatCompletionInputMessage[];
22
  /**
23
- * The random sampling seed.
 
24
  */
25
- seed?: number;
 
 
 
 
 
 
 
26
  /**
27
- * Stop generating tokens if a stop token is generated.
 
 
28
  */
29
- stop?: ChatCompletionInputStopReason;
 
30
  /**
31
- * If set, partial message deltas will be sent.
32
  */
 
33
  stream?: boolean;
34
  /**
35
- * The value used to modulate the logits distribution.
 
 
 
 
36
  */
37
  temperature?: number;
 
 
 
 
 
38
  /**
39
- * If set to < 1, only the smallest set of most probable tokens with probabilities that add
40
- * up to `top_p` or higher are kept for generation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  */
42
  top_p?: number;
43
  [property: string]: unknown;
44
  }
45
 
46
  export interface ChatCompletionInputMessage {
47
- /**
48
- * The content of the message.
49
- */
50
- content: string;
51
- role: ChatCompletionMessageRole;
52
  [property: string]: unknown;
53
  }
54
 
55
- /**
56
- * The role of the message author.
57
- */
58
- export type ChatCompletionMessageRole = "assistant" | "system" | "user";
 
 
59
 
60
- /**
61
- * Stop generating tokens if a stop token is generated.
62
- */
63
- export type ChatCompletionInputStopReason = string[] | string;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  /**
66
- * Outputs for Chat Completion inference
 
 
 
 
67
  */
68
  export interface ChatCompletionOutput {
69
- /**
70
- * A list of chat completion choices.
71
- */
72
- choices: ChatCompletionOutputChoice[];
73
- /**
74
- * The Unix timestamp (in seconds) of when the chat completion was created.
75
- */
76
  created: number;
 
 
 
 
 
77
  [property: string]: unknown;
78
  }
79
 
80
- export interface ChatCompletionOutputChoice {
81
- /**
82
- * The reason why the generation was stopped.
83
- */
84
- finish_reason: ChatCompletionFinishReason;
85
- /**
86
- * The index of the choice in the list of choices.
87
- */
88
  index: number;
89
- message: ChatCompletionOutputChoiceMessage;
 
90
  [property: string]: unknown;
91
  }
92
 
93
- /**
94
- * The reason why the generation was stopped.
95
- *
96
- * The generated sequence reached the maximum allowed length
97
- *
98
- * The model generated an end-of-sentence (EOS) token
99
- *
100
- * One of the sequence in stop_sequences was generated
101
- */
102
- export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
103
 
104
- export interface ChatCompletionOutputChoiceMessage {
105
- /**
106
- * The content of the chat completion message.
107
- */
108
- content: string;
109
- role: ChatCompletionMessageRole;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  [property: string]: unknown;
111
  }
112
 
113
  /**
114
- * Chat Completion Stream Output
 
 
 
 
115
  */
116
  export interface ChatCompletionStreamOutput {
117
- /**
118
- * A list of chat completion choices.
119
- */
120
  choices: ChatCompletionStreamOutputChoice[];
121
- /**
122
- * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
123
- * the same timestamp.
124
- */
125
  created: number;
 
 
 
 
126
  [property: string]: unknown;
127
  }
128
 
129
  export interface ChatCompletionStreamOutputChoice {
130
- /**
131
- * A chat completion delta generated by streamed model responses.
132
- */
133
  delta: ChatCompletionStreamOutputDelta;
134
- /**
135
- * The reason why the generation was stopped.
136
- */
137
- finish_reason?: ChatCompletionFinishReason;
138
- /**
139
- * The index of the choice in the list of choices.
140
- */
141
  index: number;
 
142
  [property: string]: unknown;
143
  }
144
 
145
- /**
146
- * A chat completion delta generated by streamed model responses.
147
- */
148
  export interface ChatCompletionStreamOutputDelta {
149
- /**
150
- * The contents of the chunk message.
151
- */
152
  content?: string;
153
- /**
154
- * The role of the author of this message.
155
- */
156
- role?: string;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  [property: string]: unknown;
158
  }
 
5
  */
6
 
7
  /**
8
+ * Chat Completion Input.
9
+ *
10
+ * Auto-generated from TGI specs.
11
+ * For more details, check out
12
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
13
  */
14
  export interface ChatCompletionInput {
15
  /**
16
  * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
17
+ * frequency in the text so far,
18
+ * decreasing the model's likelihood to repeat the same line verbatim.
19
  */
20
  frequency_penalty?: number;
21
+ /**
22
+ * UNUSED
23
+ * Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
24
+ * object that maps tokens
25
+ * (specified by their token ID in the tokenizer) to an associated bias value from -100 to
26
+ * 100. Mathematically,
27
+ * the bias is added to the logits generated by the model prior to sampling. The exact
28
+ * effect will vary per model,
29
+ * but values between -1 and 1 should decrease or increase likelihood of selection; values
30
+ * like -100 or 100 should
31
+ * result in a ban or exclusive selection of the relevant token.
32
+ */
33
+ logit_bias?: number[];
34
+ /**
35
+ * Whether to return log probabilities of the output tokens or not. If true, returns the log
36
+ * probabilities of each
37
+ * output token returned in the content of message.
38
+ */
39
+ logprobs?: boolean;
40
  /**
41
  * The maximum number of tokens that can be generated in the chat completion.
42
  */
43
  max_tokens?: number;
44
+ /**
45
+ * A list of messages comprising the conversation so far.
46
+ */
47
  messages: ChatCompletionInputMessage[];
48
  /**
49
+ * [UNUSED] ID of the model to use. See the model endpoint compatibility table for details
50
+ * on which models work with the Chat API.
51
  */
52
+ model: string;
53
+ /**
54
+ * UNUSED
55
+ * How many chat completion choices to generate for each input message. Note that you will
56
+ * be charged based on the
57
+ * number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
58
+ */
59
+ n?: number;
60
  /**
61
+ * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
62
+ * appear in the text so far,
63
+ * increasing the model's likelihood to talk about new topics
64
  */
65
+ presence_penalty?: number;
66
+ seed?: number;
67
  /**
68
+ * Up to 4 sequences where the API will stop generating further tokens.
69
  */
70
+ stop?: string[];
71
  stream?: boolean;
72
  /**
73
+ * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
74
+ * output more random, while
75
+ * lower values like 0.2 will make it more focused and deterministic.
76
+ *
77
+ * We generally recommend altering this or `top_p` but not both.
78
  */
79
  temperature?: number;
80
+ tool_choice?: ChatCompletionInputToolType;
81
+ /**
82
+ * A prompt to be appended before the tools
83
+ */
84
+ tool_prompt?: string;
85
  /**
86
+ * A list of tools the model may call. Currently, only functions are supported as a tool.
87
+ * Use this to provide a list of
88
+ * functions the model may generate JSON inputs for.
89
+ */
90
+ tools?: ChatCompletionInputTool[];
91
+ /**
92
+ * An integer between 0 and 5 specifying the number of most likely tokens to return at each
93
+ * token position, each with
94
+ * an associated log probability. logprobs must be set to true if this parameter is used.
95
+ */
96
+ top_logprobs?: number;
97
+ /**
98
+ * An alternative to sampling with temperature, called nucleus sampling, where the model
99
+ * considers the results of the
100
+ * tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%
101
+ * probability mass are considered.
102
  */
103
  top_p?: number;
104
  [property: string]: unknown;
105
  }
106
 
107
  export interface ChatCompletionInputMessage {
108
+ content?: string;
109
+ name?: string;
110
+ role: string;
111
+ tool_calls?: ChatCompletionInputToolCall[];
 
112
  [property: string]: unknown;
113
  }
114
 
115
+ export interface ChatCompletionInputToolCall {
116
+ function: ChatCompletionInputFunctionDefinition;
117
+ id: number;
118
+ type: string;
119
+ [property: string]: unknown;
120
+ }
121
 
122
+ export interface ChatCompletionInputFunctionDefinition {
123
+ arguments: unknown;
124
+ description?: string;
125
+ name: string;
126
+ [property: string]: unknown;
127
+ }
128
+
129
+ export type ChatCompletionInputToolType = "OneOf" | ChatCompletionInputToolTypeObject;
130
+
131
+ export interface ChatCompletionInputToolTypeObject {
132
+ FunctionName: string;
133
+ [property: string]: unknown;
134
+ }
135
+
136
+ export interface ChatCompletionInputTool {
137
+ function: ChatCompletionInputFunctionDefinition;
138
+ type: string;
139
+ [property: string]: unknown;
140
+ }
141
 
142
  /**
143
+ * Chat Completion Output.
144
+ *
145
+ * Auto-generated from TGI specs.
146
+ * For more details, check out
147
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
148
  */
149
  export interface ChatCompletionOutput {
150
+ choices: ChatCompletionOutputComplete[];
 
 
 
 
 
 
151
  created: number;
152
+ id: string;
153
+ model: string;
154
+ object: string;
155
+ system_fingerprint: string;
156
+ usage: ChatCompletionOutputUsage;
157
  [property: string]: unknown;
158
  }
159
 
160
+ export interface ChatCompletionOutputComplete {
161
+ finish_reason: string;
 
 
 
 
 
 
162
  index: number;
163
+ logprobs?: ChatCompletionOutputLogprobs;
164
+ message: ChatCompletionOutputMessage;
165
  [property: string]: unknown;
166
  }
167
 
168
+ export interface ChatCompletionOutputLogprobs {
169
+ content: ChatCompletionOutputLogprob[];
170
+ [property: string]: unknown;
171
+ }
 
 
 
 
 
 
172
 
173
+ export interface ChatCompletionOutputLogprob {
174
+ logprob: number;
175
+ token: string;
176
+ top_logprobs: ChatCompletionOutputTopLogprob[];
177
+ [property: string]: unknown;
178
+ }
179
+
180
+ export interface ChatCompletionOutputTopLogprob {
181
+ logprob: number;
182
+ token: string;
183
+ [property: string]: unknown;
184
+ }
185
+
186
+ export interface ChatCompletionOutputMessage {
187
+ content?: string;
188
+ name?: string;
189
+ role: string;
190
+ tool_calls?: ChatCompletionOutputToolCall[];
191
+ [property: string]: unknown;
192
+ }
193
+
194
+ export interface ChatCompletionOutputToolCall {
195
+ function: ChatCompletionOutputFunctionDefinition;
196
+ id: number;
197
+ type: string;
198
+ [property: string]: unknown;
199
+ }
200
+
201
+ export interface ChatCompletionOutputFunctionDefinition {
202
+ arguments: unknown;
203
+ description?: string;
204
+ name: string;
205
+ [property: string]: unknown;
206
+ }
207
+
208
+ export interface ChatCompletionOutputUsage {
209
+ completion_tokens: number;
210
+ prompt_tokens: number;
211
+ total_tokens: number;
212
  [property: string]: unknown;
213
  }
214
 
215
  /**
216
+ * Chat Completion Stream Output.
217
+ *
218
+ * Auto-generated from TGI specs.
219
+ * For more details, check out
220
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
221
  */
222
  export interface ChatCompletionStreamOutput {
 
 
 
223
  choices: ChatCompletionStreamOutputChoice[];
 
 
 
 
224
  created: number;
225
+ id: string;
226
+ model: string;
227
+ object: string;
228
+ system_fingerprint: string;
229
  [property: string]: unknown;
230
  }
231
 
232
  export interface ChatCompletionStreamOutputChoice {
 
 
 
233
  delta: ChatCompletionStreamOutputDelta;
234
+ finish_reason?: string;
 
 
 
 
 
 
235
  index: number;
236
+ logprobs?: ChatCompletionStreamOutputLogprobs;
237
  [property: string]: unknown;
238
  }
239
 
 
 
 
240
  export interface ChatCompletionStreamOutputDelta {
 
 
 
241
  content?: string;
242
+ role: string;
243
+ tool_calls?: ChatCompletionStreamOutputDeltaToolCall;
244
+ [property: string]: unknown;
245
+ }
246
+
247
+ export interface ChatCompletionStreamOutputDeltaToolCall {
248
+ function: ChatCompletionStreamOutputFunction;
249
+ id: string;
250
+ index: number;
251
+ type: string;
252
+ [property: string]: unknown;
253
+ }
254
+
255
+ export interface ChatCompletionStreamOutputFunction {
256
+ arguments: string;
257
+ name?: string;
258
+ [property: string]: unknown;
259
+ }
260
+
261
+ export interface ChatCompletionStreamOutputLogprobs {
262
+ content: ChatCompletionStreamOutputLogprob[];
263
+ [property: string]: unknown;
264
+ }
265
+
266
+ export interface ChatCompletionStreamOutputLogprob {
267
+ logprob: number;
268
+ token: string;
269
+ top_logprobs: ChatCompletionStreamOutputTopLogprob[];
270
+ [property: string]: unknown;
271
+ }
272
+
273
+ export interface ChatCompletionStreamOutputTopLogprob {
274
+ logprob: number;
275
+ token: string;
276
  [property: string]: unknown;
277
  }
packages/tasks/src/tasks/chat-completion/spec/input.json CHANGED
@@ -1,63 +1,227 @@
1
  {
2
- "title": "ChatCompletionInput",
3
  "$id": "/inference/schemas/chat-completion/input.json",
4
  "$schema": "http://json-schema.org/draft-06/schema#",
5
- "description": "Inputs for ChatCompletion inference",
 
6
  "type": "object",
 
7
  "properties": {
8
- "messages": {
 
 
 
 
 
 
 
9
  "type": "array",
10
- "title": "ChatCompletionInputMessage",
11
  "items": {
12
- "type": "object",
13
- "properties": {
14
- "role": {
15
- "$ref": "#/definitions/Role"
16
- },
17
- "content": {
18
- "type": "string",
19
- "description": "The content of the message."
20
- }
21
- },
22
- "required": ["role", "content"]
23
- }
24
  },
25
- "frequency_penalty": {
26
- "type": "number",
27
- "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
 
 
28
  },
29
  "max_tokens": {
30
  "type": "integer",
31
- "description": "The maximum number of tokens that can be generated in the chat completion."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  },
33
  "seed": {
34
  "type": "integer",
35
- "description": "The random sampling seed."
 
 
 
36
  },
37
  "stop": {
38
- "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
39
- "title": "ChatCompletionInputStopReason",
40
- "description": "Stop generating tokens if a stop token is generated."
 
 
 
 
41
  },
42
  "stream": {
43
- "type": "boolean",
44
- "description": "If set, partial message deltas will be sent."
45
  },
46
  "temperature": {
47
  "type": "number",
48
- "description": "The value used to modulate the logits distribution."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  },
50
  "top_p": {
51
  "type": "number",
52
- "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
 
 
 
53
  }
54
  },
55
- "required": ["messages"],
56
- "definitions": {
57
- "Role": {
58
- "oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }],
59
- "title": "ChatCompletionMessageRole",
60
- "description": "The role of the message author."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
 
1
  {
 
2
  "$id": "/inference/schemas/chat-completion/input.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Chat Completion Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
+ "title": "ChatCompletionInput",
6
  "type": "object",
7
+ "required": ["model", "messages"],
8
  "properties": {
9
+ "frequency_penalty": {
10
+ "type": "number",
11
+ "format": "float",
12
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
13
+ "example": "1.0",
14
+ "nullable": true
15
+ },
16
+ "logit_bias": {
17
  "type": "array",
 
18
  "items": {
19
+ "type": "number",
20
+ "format": "float"
21
+ },
22
+ "description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
23
+ "nullable": true
 
 
 
 
 
 
 
24
  },
25
+ "logprobs": {
26
+ "type": "boolean",
27
+ "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
28
+ "example": "false",
29
+ "nullable": true
30
  },
31
  "max_tokens": {
32
  "type": "integer",
33
+ "format": "int32",
34
+ "description": "The maximum number of tokens that can be generated in the chat completion.",
35
+ "example": "32",
36
+ "nullable": true,
37
+ "minimum": 0
38
+ },
39
+ "messages": {
40
+ "type": "array",
41
+ "items": {
42
+ "$ref": "#/$defs/ChatCompletionInputMessage"
43
+ },
44
+ "description": "A list of messages comprising the conversation so far.",
45
+ "example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]"
46
+ },
47
+ "model": {
48
+ "type": "string",
49
+ "description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
50
+ "example": "mistralai/Mistral-7B-Instruct-v0.2"
51
+ },
52
+ "n": {
53
+ "type": "integer",
54
+ "format": "int32",
55
+ "description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
56
+ "example": "2",
57
+ "nullable": true,
58
+ "minimum": 0
59
+ },
60
+ "presence_penalty": {
61
+ "type": "number",
62
+ "format": "float",
63
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
64
+ "example": 0.1,
65
+ "nullable": true
66
  },
67
  "seed": {
68
  "type": "integer",
69
+ "format": "int64",
70
+ "example": 42,
71
+ "nullable": true,
72
+ "minimum": 0
73
  },
74
  "stop": {
75
+ "type": "array",
76
+ "items": {
77
+ "type": "string"
78
+ },
79
+ "description": "Up to 4 sequences where the API will stop generating further tokens.",
80
+ "example": "null",
81
+ "nullable": true
82
  },
83
  "stream": {
84
+ "type": "boolean"
 
85
  },
86
  "temperature": {
87
  "type": "number",
88
+ "format": "float",
89
+ "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
90
+ "example": 1,
91
+ "nullable": true
92
+ },
93
+ "tool_choice": {
94
+ "allOf": [
95
+ {
96
+ "$ref": "#/$defs/ChatCompletionInputToolType"
97
+ }
98
+ ],
99
+ "nullable": true
100
+ },
101
+ "tool_prompt": {
102
+ "type": "string",
103
+ "description": "A prompt to be appended before the tools",
104
+ "example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
105
+ "nullable": true
106
+ },
107
+ "tools": {
108
+ "type": "array",
109
+ "items": {
110
+ "$ref": "#/$defs/ChatCompletionInputTool"
111
+ },
112
+ "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.",
113
+ "example": "null",
114
+ "nullable": true
115
+ },
116
+ "top_logprobs": {
117
+ "type": "integer",
118
+ "format": "int32",
119
+ "description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
120
+ "example": "5",
121
+ "nullable": true,
122
+ "minimum": 0
123
  },
124
  "top_p": {
125
  "type": "number",
126
+ "format": "float",
127
+ "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
128
+ "example": 0.95,
129
+ "nullable": true
130
  }
131
  },
132
+ "$defs": {
133
+ "ChatCompletionInputMessage": {
134
+ "type": "object",
135
+ "required": ["role"],
136
+ "properties": {
137
+ "content": {
138
+ "type": "string",
139
+ "example": "My name is David and I",
140
+ "nullable": true
141
+ },
142
+ "name": {
143
+ "type": "string",
144
+ "example": "\"David\"",
145
+ "nullable": true
146
+ },
147
+ "role": {
148
+ "type": "string",
149
+ "example": "user"
150
+ },
151
+ "tool_calls": {
152
+ "type": "array",
153
+ "items": {
154
+ "$ref": "#/$defs/ChatCompletionInputToolCall"
155
+ },
156
+ "nullable": true
157
+ }
158
+ },
159
+ "title": "ChatCompletionInputMessage"
160
+ },
161
+ "ChatCompletionInputToolCall": {
162
+ "type": "object",
163
+ "required": ["id", "type", "function"],
164
+ "properties": {
165
+ "function": {
166
+ "$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
167
+ },
168
+ "id": {
169
+ "type": "integer",
170
+ "format": "int32",
171
+ "minimum": 0
172
+ },
173
+ "type": {
174
+ "type": "string"
175
+ }
176
+ },
177
+ "title": "ChatCompletionInputToolCall"
178
+ },
179
+ "ChatCompletionInputFunctionDefinition": {
180
+ "type": "object",
181
+ "required": ["name", "arguments"],
182
+ "properties": {
183
+ "arguments": {},
184
+ "description": {
185
+ "type": "string",
186
+ "nullable": true
187
+ },
188
+ "name": {
189
+ "type": "string"
190
+ }
191
+ },
192
+ "title": "ChatCompletionInputFunctionDefinition"
193
+ },
194
+ "ChatCompletionInputToolType": {
195
+ "oneOf": [
196
+ {
197
+ "type": "object",
198
+ "required": ["FunctionName"],
199
+ "properties": {
200
+ "FunctionName": {
201
+ "type": "string"
202
+ }
203
+ }
204
+ },
205
+ {
206
+ "type": "string",
207
+ "enum": ["OneOf"]
208
+ }
209
+ ],
210
+ "title": "ChatCompletionInputToolType"
211
+ },
212
+ "ChatCompletionInputTool": {
213
+ "type": "object",
214
+ "required": ["type", "function"],
215
+ "properties": {
216
+ "function": {
217
+ "$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
218
+ },
219
+ "type": {
220
+ "type": "string",
221
+ "example": "function"
222
+ }
223
+ },
224
+ "title": "ChatCompletionInputTool"
225
  }
226
  }
227
  }
packages/tasks/src/tasks/chat-completion/spec/output.json CHANGED
@@ -1,58 +1,196 @@
1
  {
2
  "$id": "/inference/schemas/chat-completion/output.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
- "description": "Outputs for Chat Completion inference",
5
  "title": "ChatCompletionOutput",
6
  "type": "object",
 
7
  "properties": {
8
  "choices": {
9
  "type": "array",
10
- "description": "A list of chat completion choices.",
11
- "title": "ChatCompletionOutputChoice",
12
  "items": {
13
- "type": "object",
14
- "properties": {
15
- "finish_reason": {
16
- "$ref": "#/definitions/FinishReason",
17
- "description": "The reason why the generation was stopped."
18
- },
19
- "index": {
20
- "type": "integer",
21
- "description": "The index of the choice in the list of choices."
22
- },
23
- "message": {
24
- "type": "object",
25
- "properties": {
26
- "role": {
27
- "$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
28
- },
29
- "content": {
30
- "type": "string",
31
- "description": "The content of the chat completion message."
32
- }
33
- },
34
- "title": "ChatCompletionOutputChoiceMessage",
35
- "required": ["content", "role"]
36
- }
37
- },
38
- "required": ["finish_reason", "index", "message"]
39
  }
40
  },
41
  "created": {
42
  "type": "integer",
43
- "description": "The Unix timestamp (in seconds) of when the chat completion was created."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
45
  },
46
- "required": ["choices", "created"],
47
- "definitions": {
48
- "FinishReason": {
49
- "type": "string",
50
- "title": "ChatCompletionFinishReason",
51
- "oneOf": [
52
- { "const": "length", "description": "The generated sequence reached the maximum allowed length" },
53
- { "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
54
- { "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
55
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
  }
58
  }
 
1
  {
2
  "$id": "/inference/schemas/chat-completion/output.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
  "title": "ChatCompletionOutput",
6
  "type": "object",
7
+ "required": ["id", "object", "created", "model", "system_fingerprint", "choices", "usage"],
8
  "properties": {
9
  "choices": {
10
  "type": "array",
 
 
11
  "items": {
12
+ "$ref": "#/$defs/ChatCompletionOutputComplete"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
  },
15
  "created": {
16
  "type": "integer",
17
+ "format": "int64",
18
+ "example": "1706270835",
19
+ "minimum": 0
20
+ },
21
+ "id": {
22
+ "type": "string"
23
+ },
24
+ "model": {
25
+ "type": "string",
26
+ "example": "mistralai/Mistral-7B-Instruct-v0.2"
27
+ },
28
+ "object": {
29
+ "type": "string"
30
+ },
31
+ "system_fingerprint": {
32
+ "type": "string"
33
+ },
34
+ "usage": {
35
+ "$ref": "#/$defs/ChatCompletionOutputUsage"
36
  }
37
  },
38
+ "$defs": {
39
+ "ChatCompletionOutputComplete": {
40
+ "type": "object",
41
+ "required": ["index", "message", "finish_reason"],
42
+ "properties": {
43
+ "finish_reason": {
44
+ "type": "string"
45
+ },
46
+ "index": {
47
+ "type": "integer",
48
+ "format": "int32",
49
+ "minimum": 0
50
+ },
51
+ "logprobs": {
52
+ "allOf": [
53
+ {
54
+ "$ref": "#/$defs/ChatCompletionOutputLogprobs"
55
+ }
56
+ ],
57
+ "nullable": true
58
+ },
59
+ "message": {
60
+ "$ref": "#/$defs/ChatCompletionOutputMessage"
61
+ }
62
+ },
63
+ "title": "ChatCompletionOutputComplete"
64
+ },
65
+ "ChatCompletionOutputLogprobs": {
66
+ "type": "object",
67
+ "required": ["content"],
68
+ "properties": {
69
+ "content": {
70
+ "type": "array",
71
+ "items": {
72
+ "$ref": "#/$defs/ChatCompletionOutputLogprob"
73
+ }
74
+ }
75
+ },
76
+ "title": "ChatCompletionOutputLogprobs"
77
+ },
78
+ "ChatCompletionOutputLogprob": {
79
+ "type": "object",
80
+ "required": ["token", "logprob", "top_logprobs"],
81
+ "properties": {
82
+ "logprob": {
83
+ "type": "number",
84
+ "format": "float"
85
+ },
86
+ "token": {
87
+ "type": "string"
88
+ },
89
+ "top_logprobs": {
90
+ "type": "array",
91
+ "items": {
92
+ "$ref": "#/$defs/ChatCompletionOutputTopLogprob"
93
+ }
94
+ }
95
+ },
96
+ "title": "ChatCompletionOutputLogprob"
97
+ },
98
+ "ChatCompletionOutputTopLogprob": {
99
+ "type": "object",
100
+ "required": ["token", "logprob"],
101
+ "properties": {
102
+ "logprob": {
103
+ "type": "number",
104
+ "format": "float"
105
+ },
106
+ "token": {
107
+ "type": "string"
108
+ }
109
+ },
110
+ "title": "ChatCompletionOutputTopLogprob"
111
+ },
112
+ "ChatCompletionOutputMessage": {
113
+ "type": "object",
114
+ "required": ["role"],
115
+ "properties": {
116
+ "content": {
117
+ "type": "string",
118
+ "example": "My name is David and I",
119
+ "nullable": true
120
+ },
121
+ "name": {
122
+ "type": "string",
123
+ "example": "\"David\"",
124
+ "nullable": true
125
+ },
126
+ "role": {
127
+ "type": "string",
128
+ "example": "user"
129
+ },
130
+ "tool_calls": {
131
+ "type": "array",
132
+ "items": {
133
+ "$ref": "#/$defs/ChatCompletionOutputToolCall"
134
+ },
135
+ "nullable": true
136
+ }
137
+ },
138
+ "title": "ChatCompletionOutputMessage"
139
+ },
140
+ "ChatCompletionOutputToolCall": {
141
+ "type": "object",
142
+ "required": ["id", "type", "function"],
143
+ "properties": {
144
+ "function": {
145
+ "$ref": "#/$defs/ChatCompletionOutputFunctionDefinition"
146
+ },
147
+ "id": {
148
+ "type": "integer",
149
+ "format": "int32",
150
+ "minimum": 0
151
+ },
152
+ "type": {
153
+ "type": "string"
154
+ }
155
+ },
156
+ "title": "ChatCompletionOutputToolCall"
157
+ },
158
+ "ChatCompletionOutputFunctionDefinition": {
159
+ "type": "object",
160
+ "required": ["name", "arguments"],
161
+ "properties": {
162
+ "arguments": {},
163
+ "description": {
164
+ "type": "string",
165
+ "nullable": true
166
+ },
167
+ "name": {
168
+ "type": "string"
169
+ }
170
+ },
171
+ "title": "ChatCompletionOutputFunctionDefinition"
172
+ },
173
+ "ChatCompletionOutputUsage": {
174
+ "type": "object",
175
+ "required": ["prompt_tokens", "completion_tokens", "total_tokens"],
176
+ "properties": {
177
+ "completion_tokens": {
178
+ "type": "integer",
179
+ "format": "int32",
180
+ "minimum": 0
181
+ },
182
+ "prompt_tokens": {
183
+ "type": "integer",
184
+ "format": "int32",
185
+ "minimum": 0
186
+ },
187
+ "total_tokens": {
188
+ "type": "integer",
189
+ "format": "int32",
190
+ "minimum": 0
191
+ }
192
+ },
193
+ "title": "ChatCompletionOutputUsage"
194
  }
195
  }
196
  }
packages/tasks/src/tasks/chat-completion/spec/output_stream.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "$id": "/inference/schemas/chat-completion/output_stream.json",
3
- "$schema": "http://json-schema.org/draft-06/schema#",
4
- "description": "Chat Completion Stream Output",
5
- "title": "ChatCompletionStreamOutput",
6
- "type": "object",
7
- "properties": {
8
- "choices": {
9
- "type": "array",
10
- "title": "ChatCompletionStreamOutputChoice",
11
- "description": "A list of chat completion choices.",
12
- "items": {
13
- "type": "object",
14
- "properties": {
15
- "delta": {
16
- "type": "object",
17
- "title": "ChatCompletionStreamOutputDelta",
18
- "description": "A chat completion delta generated by streamed model responses.",
19
- "properties": {
20
- "content": {
21
- "type": "string",
22
- "description": "The contents of the chunk message."
23
- },
24
- "role": {
25
- "type": "string",
26
- "description": "The role of the author of this message."
27
- }
28
- }
29
- },
30
- "finish_reason": {
31
- "$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
32
- "description": "The reason why the generation was stopped."
33
- },
34
- "index": {
35
- "type": "integer",
36
- "description": "The index of the choice in the list of choices."
37
- }
38
- },
39
- "required": ["delta", "index"]
40
- }
41
- },
42
- "created": {
43
- "type": "integer",
44
- "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
45
- }
46
- },
47
- "required": ["choices", "created"]
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
packages/tasks/src/tasks/chat-completion/spec/stream_output.json ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$id": "/inference/schemas/chat-completion/stream_output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
+ "title": "ChatCompletionStreamOutput",
6
+ "type": "object",
7
+ "required": ["id", "object", "created", "model", "system_fingerprint", "choices"],
8
+ "properties": {
9
+ "choices": {
10
+ "type": "array",
11
+ "items": {
12
+ "$ref": "#/$defs/ChatCompletionStreamOutputChoice"
13
+ }
14
+ },
15
+ "created": {
16
+ "type": "integer",
17
+ "format": "int64",
18
+ "example": "1706270978",
19
+ "minimum": 0
20
+ },
21
+ "id": {
22
+ "type": "string"
23
+ },
24
+ "model": {
25
+ "type": "string",
26
+ "example": "mistralai/Mistral-7B-Instruct-v0.2"
27
+ },
28
+ "object": {
29
+ "type": "string"
30
+ },
31
+ "system_fingerprint": {
32
+ "type": "string"
33
+ }
34
+ },
35
+ "$defs": {
36
+ "ChatCompletionStreamOutputChoice": {
37
+ "type": "object",
38
+ "required": ["index", "delta"],
39
+ "properties": {
40
+ "delta": {
41
+ "$ref": "#/$defs/ChatCompletionStreamOutputDelta"
42
+ },
43
+ "finish_reason": {
44
+ "type": "string",
45
+ "nullable": true
46
+ },
47
+ "index": {
48
+ "type": "integer",
49
+ "format": "int32",
50
+ "minimum": 0
51
+ },
52
+ "logprobs": {
53
+ "allOf": [
54
+ {
55
+ "$ref": "#/$defs/ChatCompletionStreamOutputLogprobs"
56
+ }
57
+ ],
58
+ "nullable": true
59
+ }
60
+ },
61
+ "title": "ChatCompletionStreamOutputChoice"
62
+ },
63
+ "ChatCompletionStreamOutputDelta": {
64
+ "type": "object",
65
+ "required": ["role"],
66
+ "properties": {
67
+ "content": {
68
+ "type": "string",
69
+ "example": "What is Deep Learning?",
70
+ "nullable": true
71
+ },
72
+ "role": {
73
+ "type": "string",
74
+ "example": "user"
75
+ },
76
+ "tool_calls": {
77
+ "allOf": [
78
+ {
79
+ "$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
80
+ }
81
+ ],
82
+ "nullable": true
83
+ }
84
+ },
85
+ "title": "ChatCompletionStreamOutputDelta"
86
+ },
87
+ "ChatCompletionStreamOutputDeltaToolCall": {
88
+ "type": "object",
89
+ "required": ["index", "id", "type", "function"],
90
+ "properties": {
91
+ "function": {
92
+ "$ref": "#/$defs/ChatCompletionStreamOutputFunction"
93
+ },
94
+ "id": {
95
+ "type": "string"
96
+ },
97
+ "index": {
98
+ "type": "integer",
99
+ "format": "int32",
100
+ "minimum": 0
101
+ },
102
+ "type": {
103
+ "type": "string"
104
+ }
105
+ },
106
+ "title": "ChatCompletionStreamOutputDeltaToolCall"
107
+ },
108
+ "ChatCompletionStreamOutputFunction": {
109
+ "type": "object",
110
+ "required": ["arguments"],
111
+ "properties": {
112
+ "arguments": {
113
+ "type": "string"
114
+ },
115
+ "name": {
116
+ "type": "string",
117
+ "nullable": true
118
+ }
119
+ },
120
+ "title": "ChatCompletionStreamOutputFunction"
121
+ },
122
+ "ChatCompletionStreamOutputLogprobs": {
123
+ "type": "object",
124
+ "required": ["content"],
125
+ "properties": {
126
+ "content": {
127
+ "type": "array",
128
+ "items": {
129
+ "$ref": "#/$defs/ChatCompletionStreamOutputLogprob"
130
+ }
131
+ }
132
+ },
133
+ "title": "ChatCompletionStreamOutputLogprobs"
134
+ },
135
+ "ChatCompletionStreamOutputLogprob": {
136
+ "type": "object",
137
+ "required": ["token", "logprob", "top_logprobs"],
138
+ "properties": {
139
+ "logprob": {
140
+ "type": "number",
141
+ "format": "float"
142
+ },
143
+ "token": {
144
+ "type": "string"
145
+ },
146
+ "top_logprobs": {
147
+ "type": "array",
148
+ "items": {
149
+ "$ref": "#/$defs/ChatCompletionStreamOutputTopLogprob"
150
+ }
151
+ }
152
+ },
153
+ "title": "ChatCompletionStreamOutputLogprob"
154
+ },
155
+ "ChatCompletionStreamOutputTopLogprob": {
156
+ "type": "object",
157
+ "required": ["token", "logprob"],
158
+ "properties": {
159
+ "logprob": {
160
+ "type": "number",
161
+ "format": "float"
162
+ },
163
+ "token": {
164
+ "type": "string"
165
+ }
166
+ },
167
+ "title": "ChatCompletionStreamOutputTopLogprob"
168
+ }
169
+ }
170
+ }
packages/tasks/src/tasks/index.ts CHANGED
@@ -43,9 +43,8 @@ export type {
43
  ChatCompletionInput,
44
  ChatCompletionInputMessage,
45
  ChatCompletionOutput,
46
- ChatCompletionOutputChoice,
47
- ChatCompletionFinishReason,
48
- ChatCompletionOutputChoiceMessage,
49
  ChatCompletionStreamOutput,
50
  ChatCompletionStreamOutputChoice,
51
  ChatCompletionStreamOutputDelta,
@@ -85,15 +84,15 @@ export type {
85
  TextClassificationParameters,
86
  } from "./text-classification/inference";
87
  export type {
88
- TextGenerationFinishReason,
89
- TextGenerationPrefillToken,
90
  TextGenerationInput,
91
  TextGenerationOutput,
92
  TextGenerationOutputDetails,
93
- TextGenerationParameters,
94
- TextGenerationOutputSequenceDetails,
95
  TextGenerationOutputToken,
96
- TextGenerationStreamDetails,
97
  TextGenerationStreamOutput,
98
  } from "./text-generation/inference";
99
  export type * from "./video-classification/inference";
 
43
  ChatCompletionInput,
44
  ChatCompletionInputMessage,
45
  ChatCompletionOutput,
46
+ ChatCompletionOutputComplete,
47
+ ChatCompletionOutputMessage,
 
48
  ChatCompletionStreamOutput,
49
  ChatCompletionStreamOutputChoice,
50
  ChatCompletionStreamOutputDelta,
 
84
  TextClassificationParameters,
85
  } from "./text-classification/inference";
86
  export type {
87
+ TextGenerationOutputFinishReason,
88
+ TextGenerationOutputPrefillToken,
89
  TextGenerationInput,
90
  TextGenerationOutput,
91
  TextGenerationOutputDetails,
92
+ TextGenerationInputGenerateParameters,
93
+ TextGenerationOutputBestOfSequence,
94
  TextGenerationOutputToken,
95
+ TextGenerationStreamOutputStreamDetails,
96
  TextGenerationStreamOutput,
97
  } from "./text-generation/inference";
98
  export type * from "./video-classification/inference";
packages/tasks/src/tasks/text-generation/inference.ts CHANGED
@@ -5,246 +5,134 @@
5
  */
6
 
7
  /**
8
- * Inputs for Text Generation inference
 
 
 
 
9
  */
10
  export interface TextGenerationInput {
11
- /**
12
- * The text to initialize generation with
13
- */
14
  inputs: string;
15
- /**
16
- * Additional inference parameters
17
- */
18
- parameters?: TextGenerationParameters;
19
- /**
20
- * Whether to stream output tokens
21
- */
22
  stream?: boolean;
23
  [property: string]: unknown;
24
  }
25
 
26
- /**
27
- * Additional inference parameters
28
- *
29
- * Additional inference parameters for Text Generation
30
- */
31
- export interface TextGenerationParameters {
32
- /**
33
- * The number of sampling queries to run. Only the best one (in terms of total logprob) will
34
- * be returned.
35
- */
36
  best_of?: number;
37
- /**
38
- * Whether or not to output decoder input details
39
- */
40
  decoder_input_details?: boolean;
41
- /**
42
- * Whether or not to output details
43
- */
44
  details?: boolean;
45
- /**
46
- * Whether to use logits sampling instead of greedy decoding when generating new tokens.
47
- */
48
  do_sample?: boolean;
49
- /**
50
- * The maximum number of tokens to generate.
51
- */
52
  max_new_tokens?: number;
53
- /**
54
- * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
55
- * paper](https://hf.co/papers/1909.05858) for more details.
56
- */
57
  repetition_penalty?: number;
58
- /**
59
- * Whether to prepend the prompt to the generated text.
60
- */
61
  return_full_text?: boolean;
62
- /**
63
- * The random sampling seed.
64
- */
65
  seed?: number;
66
- /**
67
- * Stop generating tokens if a member of `stop_sequences` is generated.
68
- */
69
- stop_sequences?: string[];
70
- /**
71
- * The value used to modulate the logits distribution.
72
- */
73
  temperature?: number;
74
- /**
75
- * The number of highest probability vocabulary tokens to keep for top-k-filtering.
76
- */
77
  top_k?: number;
78
- /**
79
- * If set to < 1, only the smallest set of most probable tokens with probabilities that add
80
- * up to `top_p` or higher are kept for generation.
81
- */
82
  top_p?: number;
83
- /**
84
- * Truncate input tokens to the given size.
85
- */
86
  truncate?: number;
87
- /**
88
- * Typical Decoding mass. See [Typical Decoding for Natural Language
89
- * Generation](https://hf.co/papers/2202.00666) for more information
90
- */
91
  typical_p?: number;
 
 
 
 
 
 
92
  /**
93
- * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
 
 
 
94
  */
95
- watermark?: boolean;
96
  [property: string]: unknown;
97
  }
98
 
 
 
99
  /**
100
- * Outputs for Text Generation inference
 
 
 
 
101
  */
102
  export interface TextGenerationOutput {
103
- /**
104
- * When enabled, details about the generation
105
- */
106
  details?: TextGenerationOutputDetails;
107
- /**
108
- * The generated text
109
- */
110
  generated_text: string;
111
  [property: string]: unknown;
112
  }
113
 
114
- /**
115
- * When enabled, details about the generation
116
- */
117
  export interface TextGenerationOutputDetails {
118
- /**
119
- * Details about additional sequences when best_of is provided
120
- */
121
- best_of_sequences?: TextGenerationOutputSequenceDetails[];
122
- /**
123
- * The reason why the generation was stopped.
124
- */
125
- finish_reason: TextGenerationFinishReason;
126
- /**
127
- * The number of generated tokens
128
- */
129
  generated_tokens: number;
130
- prefill: TextGenerationPrefillToken[];
131
- /**
132
- * The random seed used for generation
133
- */
134
  seed?: number;
135
- /**
136
- * The generated tokens and associated details
137
- */
138
  tokens: TextGenerationOutputToken[];
139
- /**
140
- * Most likely tokens
141
- */
142
  top_tokens?: Array<TextGenerationOutputToken[]>;
143
  [property: string]: unknown;
144
  }
145
 
146
- export interface TextGenerationOutputSequenceDetails {
147
- finish_reason: TextGenerationFinishReason;
148
- /**
149
- * The generated text
150
- */
151
  generated_text: string;
152
- /**
153
- * The number of generated tokens
154
- */
155
  generated_tokens: number;
156
- prefill: TextGenerationPrefillToken[];
157
- /**
158
- * The random seed used for generation
159
- */
160
  seed?: number;
161
- /**
162
- * The generated tokens and associated details
163
- */
164
  tokens: TextGenerationOutputToken[];
165
- /**
166
- * Most likely tokens
167
- */
168
  top_tokens?: Array<TextGenerationOutputToken[]>;
169
  [property: string]: unknown;
170
  }
171
 
172
- /**
173
- * The reason why the generation was stopped.
174
- *
175
- * length: The generated sequence reached the maximum allowed length
176
- *
177
- * eos_token: The model generated an end-of-sentence (EOS) token
178
- *
179
- * stop_sequence: One of the sequence in stop_sequences was generated
180
- */
181
- export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
182
 
183
- export interface TextGenerationPrefillToken {
184
  id: number;
185
  logprob: number;
186
- /**
187
- * The text associated with that token
188
- */
189
  text: string;
190
  [property: string]: unknown;
191
  }
192
 
193
- /**
194
- * Generated token.
195
- */
196
  export interface TextGenerationOutputToken {
197
  id: number;
198
- logprob?: number;
199
- /**
200
- * Whether or not that token is a special one
201
- */
202
  special: boolean;
203
- /**
204
- * The text associated with that token
205
- */
206
  text: string;
207
  [property: string]: unknown;
208
  }
209
 
210
  /**
211
- * Text Generation Stream Output
 
 
 
 
212
  */
213
  export interface TextGenerationStreamOutput {
214
- /**
215
- * Generation details. Only available when the generation is finished.
216
- */
217
- details?: TextGenerationStreamDetails;
218
- /**
219
- * The complete generated text. Only available when the generation is finished.
220
- */
221
  generated_text?: string;
222
- /**
223
- * The token index within the stream. Optional to support older clients that omit it.
224
- */
225
- index?: number;
226
- /**
227
- * Generated token.
228
- */
229
- token: TextGenerationOutputToken;
230
  [property: string]: unknown;
231
  }
232
 
233
- /**
234
- * Generation details. Only available when the generation is finished.
235
- */
236
- export interface TextGenerationStreamDetails {
237
- /**
238
- * The reason why the generation was stopped.
239
- */
240
- finish_reason: TextGenerationFinishReason;
241
- /**
242
- * The number of generated tokens
243
- */
244
  generated_tokens: number;
245
- /**
246
- * The random seed used for generation
247
- */
248
- seed: number;
 
 
 
 
 
249
  [property: string]: unknown;
250
  }
 
5
  */
6
 
7
  /**
8
+ * Text Generation Input.
9
+ *
10
+ * Auto-generated from TGI specs.
11
+ * For more details, check out
12
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
13
  */
14
  export interface TextGenerationInput {
 
 
 
15
  inputs: string;
16
+ parameters?: TextGenerationInputGenerateParameters;
 
 
 
 
 
 
17
  stream?: boolean;
18
  [property: string]: unknown;
19
  }
20
 
21
+ export interface TextGenerationInputGenerateParameters {
 
 
 
 
 
 
 
 
 
22
  best_of?: number;
 
 
 
23
  decoder_input_details?: boolean;
 
 
 
24
  details?: boolean;
 
 
 
25
  do_sample?: boolean;
26
+ frequency_penalty?: number;
27
+ grammar?: TextGenerationInputGrammarType;
 
28
  max_new_tokens?: number;
 
 
 
 
29
  repetition_penalty?: number;
 
 
 
30
  return_full_text?: boolean;
 
 
 
31
  seed?: number;
32
+ stop?: string[];
 
 
 
 
 
 
33
  temperature?: number;
 
 
 
34
  top_k?: number;
35
+ top_n_tokens?: number;
 
 
 
36
  top_p?: number;
 
 
 
37
  truncate?: number;
 
 
 
 
38
  typical_p?: number;
39
+ watermark?: boolean;
40
+ [property: string]: unknown;
41
+ }
42
+
43
+ export interface TextGenerationInputGrammarType {
44
+ type: Type;
45
  /**
46
+ * A string that represents a [JSON Schema](https://json-schema.org/).
47
+ *
48
+ * JSON Schema is a declarative language that allows to annotate JSON documents
49
+ * with types and descriptions.
50
  */
51
+ value: unknown;
52
  [property: string]: unknown;
53
  }
54
 
55
+ export type Type = "json" | "regex";
56
+
57
  /**
58
+ * Text Generation Output.
59
+ *
60
+ * Auto-generated from TGI specs.
61
+ * For more details, check out
62
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
63
  */
64
  export interface TextGenerationOutput {
 
 
 
65
  details?: TextGenerationOutputDetails;
 
 
 
66
  generated_text: string;
67
  [property: string]: unknown;
68
  }
69
 
 
 
 
70
  export interface TextGenerationOutputDetails {
71
+ best_of_sequences?: TextGenerationOutputBestOfSequence[];
72
+ finish_reason: TextGenerationOutputFinishReason;
 
 
 
 
 
 
 
 
 
73
  generated_tokens: number;
74
+ prefill: TextGenerationOutputPrefillToken[];
 
 
 
75
  seed?: number;
 
 
 
76
  tokens: TextGenerationOutputToken[];
 
 
 
77
  top_tokens?: Array<TextGenerationOutputToken[]>;
78
  [property: string]: unknown;
79
  }
80
 
81
+ export interface TextGenerationOutputBestOfSequence {
82
+ finish_reason: TextGenerationOutputFinishReason;
 
 
 
83
  generated_text: string;
 
 
 
84
  generated_tokens: number;
85
+ prefill: TextGenerationOutputPrefillToken[];
 
 
 
86
  seed?: number;
 
 
 
87
  tokens: TextGenerationOutputToken[];
 
 
 
88
  top_tokens?: Array<TextGenerationOutputToken[]>;
89
  [property: string]: unknown;
90
  }
91
 
92
+ export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence";
 
 
 
 
 
 
 
 
 
93
 
94
+ export interface TextGenerationOutputPrefillToken {
95
  id: number;
96
  logprob: number;
 
 
 
97
  text: string;
98
  [property: string]: unknown;
99
  }
100
 
 
 
 
101
  export interface TextGenerationOutputToken {
102
  id: number;
103
+ logprob: number;
 
 
 
104
  special: boolean;
 
 
 
105
  text: string;
106
  [property: string]: unknown;
107
  }
108
 
109
  /**
110
+ * Text Generation Stream Output.
111
+ *
112
+ * Auto-generated from TGI specs.
113
+ * For more details, check out
114
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
115
  */
116
  export interface TextGenerationStreamOutput {
117
+ details?: TextGenerationStreamOutputStreamDetails;
 
 
 
 
 
 
118
  generated_text?: string;
119
+ index: number;
120
+ token: TextGenerationStreamOutputToken;
121
+ top_tokens?: TextGenerationStreamOutputToken[];
 
 
 
 
 
122
  [property: string]: unknown;
123
  }
124
 
125
+ export interface TextGenerationStreamOutputStreamDetails {
126
+ finish_reason: TextGenerationOutputFinishReason;
 
 
 
 
 
 
 
 
 
127
  generated_tokens: number;
128
+ seed?: number;
129
+ [property: string]: unknown;
130
+ }
131
+
132
+ export interface TextGenerationStreamOutputToken {
133
+ id: number;
134
+ logprob: number;
135
+ special: boolean;
136
+ text: string;
137
  [property: string]: unknown;
138
  }
packages/tasks/src/tasks/text-generation/spec/input.json CHANGED
@@ -1,94 +1,195 @@
1
  {
2
  "$id": "/inference/schemas/text-generation/input.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
- "description": "Inputs for Text Generation inference",
5
  "title": "TextGenerationInput",
6
  "type": "object",
 
7
  "properties": {
8
  "inputs": {
9
- "description": "The text to initialize generation with",
10
- "type": "string"
11
  },
12
  "parameters": {
13
- "description": "Additional inference parameters",
14
- "$ref": "#/$defs/TextGenerationParameters"
15
  },
16
  "stream": {
17
- "description": "Whether to stream output tokens",
18
- "type": "boolean"
19
  }
20
  },
21
  "$defs": {
22
- "TextGenerationParameters": {
23
- "title": "TextGenerationParameters",
24
- "description": "Additional inference parameters for Text Generation",
25
  "type": "object",
26
  "properties": {
27
  "best_of": {
28
  "type": "integer",
29
- "description": "The number of sampling queries to run. Only the best one (in terms of total logprob) will be returned."
 
 
 
 
30
  },
31
  "decoder_input_details": {
32
  "type": "boolean",
33
- "description": "Whether or not to output decoder input details"
34
  },
35
  "details": {
36
  "type": "boolean",
37
- "description": "Whether or not to output details"
38
  },
39
  "do_sample": {
40
  "type": "boolean",
41
- "description": "Whether to use logits sampling instead of greedy decoding when generating new tokens."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  "max_new_tokens": {
44
  "type": "integer",
45
- "description": "The maximum number of tokens to generate."
 
 
 
 
46
  },
47
  "repetition_penalty": {
48
  "type": "number",
49
- "description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details."
 
 
 
 
50
  },
51
  "return_full_text": {
52
  "type": "boolean",
53
- "description": "Whether to prepend the prompt to the generated text."
 
 
54
  },
55
  "seed": {
56
  "type": "integer",
57
- "description": "The random sampling seed."
 
 
 
 
 
58
  },
59
- "stop_sequences": {
60
  "type": "array",
61
  "items": {
62
  "type": "string"
63
  },
64
- "description": "Stop generating tokens if a member of `stop_sequences` is generated."
 
65
  },
66
  "temperature": {
67
  "type": "number",
68
- "description": "The value used to modulate the logits distribution."
 
 
 
 
69
  },
70
  "top_k": {
71
  "type": "integer",
72
- "description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  },
74
  "top_p": {
75
  "type": "number",
76
- "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
 
 
 
 
 
77
  },
78
  "truncate": {
79
  "type": "integer",
80
- "description": "Truncate input tokens to the given size."
 
 
 
81
  },
82
  "typical_p": {
83
  "type": "number",
84
- "description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information"
 
 
 
 
 
85
  },
86
  "watermark": {
87
  "type": "boolean",
88
- "description": "Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)"
 
89
  }
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  }
92
- },
93
- "required": ["inputs"]
94
  }
 
1
  {
2
  "$id": "/inference/schemas/text-generation/input.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Text Generation Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
  "title": "TextGenerationInput",
6
  "type": "object",
7
+ "required": ["inputs"],
8
  "properties": {
9
  "inputs": {
10
+ "type": "string",
11
+ "example": "My name is Olivier and I"
12
  },
13
  "parameters": {
14
+ "$ref": "#/$defs/TextGenerationInputGenerateParameters"
 
15
  },
16
  "stream": {
17
+ "type": "boolean",
18
+ "default": "false"
19
  }
20
  },
21
  "$defs": {
22
+ "TextGenerationInputGenerateParameters": {
 
 
23
  "type": "object",
24
  "properties": {
25
  "best_of": {
26
  "type": "integer",
27
+ "default": "null",
28
+ "example": 1,
29
+ "nullable": true,
30
+ "minimum": 0,
31
+ "exclusiveMinimum": 0
32
  },
33
  "decoder_input_details": {
34
  "type": "boolean",
35
+ "default": "false"
36
  },
37
  "details": {
38
  "type": "boolean",
39
+ "default": "true"
40
  },
41
  "do_sample": {
42
  "type": "boolean",
43
+ "default": "false",
44
+ "example": true
45
+ },
46
+ "frequency_penalty": {
47
+ "type": "number",
48
+ "format": "float",
49
+ "default": "null",
50
+ "example": 0.1,
51
+ "nullable": true,
52
+ "exclusiveMinimum": -2
53
+ },
54
+ "grammar": {
55
+ "allOf": [
56
+ {
57
+ "$ref": "#/$defs/TextGenerationInputGrammarType"
58
+ }
59
+ ],
60
+ "default": "null",
61
+ "nullable": true
62
  },
63
  "max_new_tokens": {
64
  "type": "integer",
65
+ "format": "int32",
66
+ "default": "100",
67
+ "example": "20",
68
+ "nullable": true,
69
+ "minimum": 0
70
  },
71
  "repetition_penalty": {
72
  "type": "number",
73
+ "format": "float",
74
+ "default": "null",
75
+ "example": 1.03,
76
+ "nullable": true,
77
+ "exclusiveMinimum": 0
78
  },
79
  "return_full_text": {
80
  "type": "boolean",
81
+ "default": "null",
82
+ "example": false,
83
+ "nullable": true
84
  },
85
  "seed": {
86
  "type": "integer",
87
+ "format": "int64",
88
+ "default": "null",
89
+ "example": "null",
90
+ "nullable": true,
91
+ "minimum": 0,
92
+ "exclusiveMinimum": 0
93
  },
94
+ "stop": {
95
  "type": "array",
96
  "items": {
97
  "type": "string"
98
  },
99
+ "example": ["photographer"],
100
+ "maxItems": 4
101
  },
102
  "temperature": {
103
  "type": "number",
104
+ "format": "float",
105
+ "default": "null",
106
+ "example": 0.5,
107
+ "nullable": true,
108
+ "exclusiveMinimum": 0
109
  },
110
  "top_k": {
111
  "type": "integer",
112
+ "format": "int32",
113
+ "default": "null",
114
+ "example": 10,
115
+ "nullable": true,
116
+ "exclusiveMinimum": 0
117
+ },
118
+ "top_n_tokens": {
119
+ "type": "integer",
120
+ "format": "int32",
121
+ "default": "null",
122
+ "example": 5,
123
+ "nullable": true,
124
+ "minimum": 0,
125
+ "exclusiveMinimum": 0
126
  },
127
  "top_p": {
128
  "type": "number",
129
+ "format": "float",
130
+ "default": "null",
131
+ "example": 0.95,
132
+ "nullable": true,
133
+ "maximum": 1,
134
+ "exclusiveMinimum": 0
135
  },
136
  "truncate": {
137
  "type": "integer",
138
+ "default": "null",
139
+ "example": "null",
140
+ "nullable": true,
141
+ "minimum": 0
142
  },
143
  "typical_p": {
144
  "type": "number",
145
+ "format": "float",
146
+ "default": "null",
147
+ "example": 0.95,
148
+ "nullable": true,
149
+ "maximum": 1,
150
+ "exclusiveMinimum": 0
151
  },
152
  "watermark": {
153
  "type": "boolean",
154
+ "default": "false",
155
+ "example": true
156
  }
157
+ },
158
+ "title": "TextGenerationInputGenerateParameters"
159
+ },
160
+ "TextGenerationInputGrammarType": {
161
+ "oneOf": [
162
+ {
163
+ "type": "object",
164
+ "required": ["type", "value"],
165
+ "properties": {
166
+ "type": {
167
+ "type": "string",
168
+ "enum": ["json"]
169
+ },
170
+ "value": {
171
+ "description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
172
+ }
173
+ }
174
+ },
175
+ {
176
+ "type": "object",
177
+ "required": ["type", "value"],
178
+ "properties": {
179
+ "type": {
180
+ "type": "string",
181
+ "enum": ["regex"]
182
+ },
183
+ "value": {
184
+ "type": "string"
185
+ }
186
+ }
187
+ }
188
+ ],
189
+ "discriminator": {
190
+ "propertyName": "type"
191
+ },
192
+ "title": "TextGenerationInputGrammarType"
193
  }
194
+ }
 
195
  }
packages/tasks/src/tasks/text-generation/spec/output.json CHANGED
@@ -1,165 +1,179 @@
1
  {
2
  "$id": "/inference/schemas/text-generation/output.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
- "description": "Outputs for Text Generation inference",
5
  "title": "TextGenerationOutput",
6
  "type": "object",
 
7
  "properties": {
 
 
 
 
 
 
 
 
8
  "generated_text": {
9
  "type": "string",
10
- "description": "The generated text"
11
- },
12
- "details": {
13
- "$ref": "#/$defs/Details",
14
- "description": "When enabled, details about the generation"
15
  }
16
  },
17
- "required": ["generated_text"],
18
  "$defs": {
19
- "FinishReason": {
20
- "type": "string",
21
- "title": "TextGenerationFinishReason",
22
- "description": "The reason why the generation was stopped.",
23
- "oneOf": [
24
- { "const": "length", "description": "length: The generated sequence reached the maximum allowed length" },
25
- { "const": "eos_token", "description": "eos_token: The model generated an end-of-sentence (EOS) token" },
26
- {
27
- "const": "stop_sequence",
28
- "description": "stop_sequence: One of the sequence in stop_sequences was generated"
29
- }
30
- ]
31
- },
32
- "PrefillToken": {
33
- "title": "TextGenerationPrefillToken",
34
  "type": "object",
 
35
  "properties": {
36
- "id": {
37
- "type": "integer"
38
- },
39
- "logprob": {
40
- "type": "number"
41
- },
42
- "text": {
43
- "type": "string",
44
- "description": "The text associated with that token"
45
- }
46
- },
47
- "required": ["id", "logprob", "text"]
48
- },
49
- "Token": {
50
- "type": "object",
51
- "title": "TextGenerationOutputToken",
52
- "properties": {
53
- "id": {
54
- "type": "integer"
55
- },
56
- "logprob": {
57
- "type": "number"
58
- },
59
- "special": {
60
- "type": "boolean",
61
- "description": "Whether or not that token is a special one"
62
  },
63
- "text": {
64
- "type": "string",
65
- "description": "The text associated with that token"
66
- }
67
- },
68
- "required": ["id", "special", "text"]
69
- },
70
- "Details": {
71
- "type": "object",
72
- "title": "TextGenerationOutputDetails",
73
- "properties": {
74
  "finish_reason": {
75
- "$ref": "#/$defs/FinishReason",
76
- "description": "The reason why the generation was stopped."
77
  },
78
  "generated_tokens": {
79
  "type": "integer",
80
- "description": "The number of generated tokens"
 
 
81
  },
82
  "prefill": {
83
  "type": "array",
84
  "items": {
85
- "$ref": "#/$defs/PrefillToken"
86
  }
87
  },
88
  "seed": {
89
  "type": "integer",
90
- "description": "The random seed used for generation"
 
 
 
91
  },
92
  "tokens": {
93
  "type": "array",
94
- "description": "The generated tokens and associated details",
95
  "items": {
96
- "$ref": "#/$defs/Token"
97
  }
98
  },
99
  "top_tokens": {
100
  "type": "array",
101
- "description": "Most likely tokens",
102
  "items": {
103
  "type": "array",
104
  "items": {
105
- "$ref": "#/$defs/Token"
106
  }
107
  }
108
- },
109
- "best_of_sequences": {
110
- "type": "array",
111
- "description": "Details about additional sequences when best_of is provided",
112
- "items": {
113
- "$ref": "#/$defs/SequenceDetails"
114
- }
115
  }
116
  },
117
- "required": ["finish_reason", "generated_tokens", "prefill", "tokens"]
118
  },
119
- "SequenceDetails": {
120
  "type": "object",
121
- "title": "TextGenerationOutputSequenceDetails",
122
  "properties": {
 
 
 
123
  "generated_text": {
124
  "type": "string",
125
- "description": "The generated text"
126
- },
127
- "finish_reason": {
128
- "$ref": "#/$defs/FinishReason"
129
  },
130
  "generated_tokens": {
131
  "type": "integer",
132
- "description": "The number of generated tokens"
 
 
133
  },
134
  "prefill": {
135
  "type": "array",
136
  "items": {
137
- "$ref": "#/$defs/PrefillToken"
138
  }
139
  },
140
  "seed": {
141
  "type": "integer",
142
- "description": "The random seed used for generation"
 
 
 
143
  },
144
  "tokens": {
145
  "type": "array",
146
- "description": "The generated tokens and associated details",
147
  "items": {
148
- "$ref": "#/$defs/Token"
149
  }
150
  },
151
  "top_tokens": {
152
  "type": "array",
153
- "description": "Most likely tokens",
154
  "items": {
155
  "type": "array",
156
  "items": {
157
- "$ref": "#/$defs/Token"
158
  }
159
  }
160
  }
161
  },
162
- "required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
  }
165
  }
 
1
  {
2
  "$id": "/inference/schemas/text-generation/output.json",
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Text Generation Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
  "title": "TextGenerationOutput",
6
  "type": "object",
7
+ "required": ["generated_text"],
8
  "properties": {
9
+ "details": {
10
+ "allOf": [
11
+ {
12
+ "$ref": "#/$defs/TextGenerationOutputDetails"
13
+ }
14
+ ],
15
+ "nullable": true
16
+ },
17
  "generated_text": {
18
  "type": "string",
19
+ "example": "test"
 
 
 
 
20
  }
21
  },
 
22
  "$defs": {
23
+ "TextGenerationOutputDetails": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "type": "object",
25
+ "required": ["finish_reason", "generated_tokens", "prefill", "tokens"],
26
  "properties": {
27
+ "best_of_sequences": {
28
+ "type": "array",
29
+ "items": {
30
+ "$ref": "#/$defs/TextGenerationOutputBestOfSequence"
31
+ },
32
+ "nullable": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
 
 
 
 
 
 
 
 
 
 
 
34
  "finish_reason": {
35
+ "$ref": "#/$defs/TextGenerationOutputFinishReason"
 
36
  },
37
  "generated_tokens": {
38
  "type": "integer",
39
+ "format": "int32",
40
+ "example": 1,
41
+ "minimum": 0
42
  },
43
  "prefill": {
44
  "type": "array",
45
  "items": {
46
+ "$ref": "#/$defs/TextGenerationOutputPrefillToken"
47
  }
48
  },
49
  "seed": {
50
  "type": "integer",
51
+ "format": "int64",
52
+ "example": 42,
53
+ "nullable": true,
54
+ "minimum": 0
55
  },
56
  "tokens": {
57
  "type": "array",
 
58
  "items": {
59
+ "$ref": "#/$defs/TextGenerationOutputToken"
60
  }
61
  },
62
  "top_tokens": {
63
  "type": "array",
 
64
  "items": {
65
  "type": "array",
66
  "items": {
67
+ "$ref": "#/$defs/TextGenerationOutputToken"
68
  }
69
  }
 
 
 
 
 
 
 
70
  }
71
  },
72
+ "title": "TextGenerationOutputDetails"
73
  },
74
+ "TextGenerationOutputBestOfSequence": {
75
  "type": "object",
76
+ "required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"],
77
  "properties": {
78
+ "finish_reason": {
79
+ "$ref": "#/$defs/TextGenerationOutputFinishReason"
80
+ },
81
  "generated_text": {
82
  "type": "string",
83
+ "example": "test"
 
 
 
84
  },
85
  "generated_tokens": {
86
  "type": "integer",
87
+ "format": "int32",
88
+ "example": 1,
89
+ "minimum": 0
90
  },
91
  "prefill": {
92
  "type": "array",
93
  "items": {
94
+ "$ref": "#/$defs/TextGenerationOutputPrefillToken"
95
  }
96
  },
97
  "seed": {
98
  "type": "integer",
99
+ "format": "int64",
100
+ "example": 42,
101
+ "nullable": true,
102
+ "minimum": 0
103
  },
104
  "tokens": {
105
  "type": "array",
 
106
  "items": {
107
+ "$ref": "#/$defs/TextGenerationOutputToken"
108
  }
109
  },
110
  "top_tokens": {
111
  "type": "array",
 
112
  "items": {
113
  "type": "array",
114
  "items": {
115
+ "$ref": "#/$defs/TextGenerationOutputToken"
116
  }
117
  }
118
  }
119
  },
120
+ "title": "TextGenerationOutputBestOfSequence"
121
+ },
122
+ "TextGenerationOutputFinishReason": {
123
+ "type": "string",
124
+ "enum": ["length", "eos_token", "stop_sequence"],
125
+ "example": "Length",
126
+ "title": "TextGenerationOutputFinishReason"
127
+ },
128
+ "TextGenerationOutputPrefillToken": {
129
+ "type": "object",
130
+ "required": ["id", "text", "logprob"],
131
+ "properties": {
132
+ "id": {
133
+ "type": "integer",
134
+ "format": "int32",
135
+ "example": 0,
136
+ "minimum": 0
137
+ },
138
+ "logprob": {
139
+ "type": "number",
140
+ "format": "float",
141
+ "example": -0.34,
142
+ "nullable": true
143
+ },
144
+ "text": {
145
+ "type": "string",
146
+ "example": "test"
147
+ }
148
+ },
149
+ "title": "TextGenerationOutputPrefillToken"
150
+ },
151
+ "TextGenerationOutputToken": {
152
+ "type": "object",
153
+ "required": ["id", "text", "logprob", "special"],
154
+ "properties": {
155
+ "id": {
156
+ "type": "integer",
157
+ "format": "int32",
158
+ "example": 0,
159
+ "minimum": 0
160
+ },
161
+ "logprob": {
162
+ "type": "number",
163
+ "format": "float",
164
+ "example": -0.34,
165
+ "nullable": true
166
+ },
167
+ "special": {
168
+ "type": "boolean",
169
+ "example": "false"
170
+ },
171
+ "text": {
172
+ "type": "string",
173
+ "example": "test"
174
+ }
175
+ },
176
+ "title": "TextGenerationOutputToken"
177
  }
178
  }
179
  }
packages/tasks/src/tasks/text-generation/spec/output_stream.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "$id": "/inference/schemas/text-generation/output.json",
3
- "$schema": "http://json-schema.org/draft-06/schema#",
4
- "description": "Text Generation Stream Output",
5
- "title": "TextGenerationStreamOutput",
6
- "type": "object",
7
- "properties": {
8
- "token": {
9
- "$ref": "#/$defs/Token",
10
- "description": "Generated token."
11
- },
12
- "index": {
13
- "type": "integer",
14
- "description": "The token index within the stream. Optional to support older clients that omit it."
15
- },
16
- "generated_text": {
17
- "type": "string",
18
- "description": "The complete generated text. Only available when the generation is finished."
19
- },
20
- "details": {
21
- "$ref": "#/$defs/StreamDetails",
22
- "description": "Generation details. Only available when the generation is finished."
23
- }
24
- },
25
- "required": ["token"],
26
- "$defs": {
27
- "StreamDetails": {
28
- "type": "object",
29
- "title": "TextGenerationStreamDetails",
30
- "properties": {
31
- "finish_reason": {
32
- "$ref": "#/$defs/FinishReason",
33
- "description": "The reason why the generation was stopped."
34
- },
35
- "generated_tokens": {
36
- "type": "integer",
37
- "description": "The number of generated tokens"
38
- },
39
- "seed": {
40
- "type": "integer",
41
- "description": "The random seed used for generation"
42
- }
43
- },
44
- "required": ["finish_reason", "generated_tokens", "seed"]
45
- }
46
- }
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
packages/tasks/src/tasks/text-generation/spec/stream_output.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "$id": "/inference/schemas/text-generation/stream_output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Text Generation Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
+ "title": "TextGenerationStreamOutput",
6
+ "type": "object",
7
+ "required": ["index", "token"],
8
+ "properties": {
9
+ "details": {
10
+ "allOf": [
11
+ {
12
+ "$ref": "#/$defs/TextGenerationStreamOutputStreamDetails"
13
+ }
14
+ ],
15
+ "default": "null",
16
+ "nullable": true
17
+ },
18
+ "generated_text": {
19
+ "type": "string",
20
+ "default": "null",
21
+ "example": "test",
22
+ "nullable": true
23
+ },
24
+ "index": {
25
+ "type": "integer",
26
+ "format": "int32",
27
+ "minimum": 0
28
+ },
29
+ "token": {
30
+ "$ref": "#/$defs/TextGenerationStreamOutputToken"
31
+ },
32
+ "top_tokens": {
33
+ "type": "array",
34
+ "items": {
35
+ "$ref": "#/$defs/TextGenerationStreamOutputToken"
36
+ }
37
+ }
38
+ },
39
+ "$defs": {
40
+ "TextGenerationStreamOutputStreamDetails": {
41
+ "type": "object",
42
+ "required": ["finish_reason", "generated_tokens"],
43
+ "properties": {
44
+ "finish_reason": {
45
+ "$ref": "#/$defs/TextGenerationStreamOutputFinishReason"
46
+ },
47
+ "generated_tokens": {
48
+ "type": "integer",
49
+ "format": "int32",
50
+ "example": 1,
51
+ "minimum": 0
52
+ },
53
+ "seed": {
54
+ "type": "integer",
55
+ "format": "int64",
56
+ "example": 42,
57
+ "nullable": true,
58
+ "minimum": 0
59
+ }
60
+ },
61
+ "title": "TextGenerationStreamOutputStreamDetails"
62
+ },
63
+ "TextGenerationStreamOutputFinishReason": {
64
+ "type": "string",
65
+ "enum": ["length", "eos_token", "stop_sequence"],
66
+ "example": "Length",
67
+ "title": "TextGenerationStreamOutputFinishReason"
68
+ },
69
+ "TextGenerationStreamOutputToken": {
70
+ "type": "object",
71
+ "required": ["id", "text", "logprob", "special"],
72
+ "properties": {
73
+ "id": {
74
+ "type": "integer",
75
+ "format": "int32",
76
+ "example": 0,
77
+ "minimum": 0
78
+ },
79
+ "logprob": {
80
+ "type": "number",
81
+ "format": "float",
82
+ "example": -0.34,
83
+ "nullable": true
84
+ },
85
+ "special": {
86
+ "type": "boolean",
87
+ "example": "false"
88
+ },
89
+ "text": {
90
+ "type": "string",
91
+ "example": "test"
92
+ }
93
+ },
94
+ "title": "TextGenerationStreamOutputToken"
95
+ }
96
+ }
97
+ }