Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
machineuser
commited on
Commit
·
ac33c34
1
Parent(s):
507971e
Sync widgets demo
Browse files- packages/jinja/test/e2e.test.js +2 -2
- packages/tasks/package.json +7 -2
- packages/tasks/pnpm-lock.yaml +52 -0
- packages/tasks/scripts/inference-codegen.ts +1 -1
- packages/tasks/scripts/inference-tgi-import.ts +115 -0
- packages/tasks/src/tasks/chat-completion/inference.ts +204 -85
- packages/tasks/src/tasks/chat-completion/spec/input.json +198 -34
- packages/tasks/src/tasks/chat-completion/spec/output.json +178 -40
- packages/tasks/src/tasks/chat-completion/spec/output_stream.json +0 -48
- packages/tasks/src/tasks/chat-completion/spec/stream_output.json +170 -0
- packages/tasks/src/tasks/index.ts +7 -8
- packages/tasks/src/tasks/text-generation/inference.ts +58 -170
- packages/tasks/src/tasks/text-generation/spec/input.json +130 -29
- packages/tasks/src/tasks/text-generation/spec/output.json +104 -90
- packages/tasks/src/tasks/text-generation/spec/output_stream.json +0 -47
- packages/tasks/src/tasks/text-generation/spec/stream_output.json +97 -0
packages/jinja/test/e2e.test.js
CHANGED
@@ -192,7 +192,7 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({
|
|
192 |
},
|
193 |
target: `<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
|
194 |
},
|
195 |
-
"
|
196 |
chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
|
197 |
data: {
|
198 |
messages: EXAMPLE_CHAT,
|
@@ -440,7 +440,7 @@ describe("End-to-end tests", () => {
|
|
440 |
});
|
441 |
|
442 |
it("should parse a chat template from the Hugging Face Hub", async () => {
|
443 |
-
const repo = "
|
444 |
const tokenizerConfig = await (
|
445 |
await downloadFile({
|
446 |
repo,
|
|
|
192 |
},
|
193 |
target: `<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
|
194 |
},
|
195 |
+
"TheBloke/Mistral-7B-Instruct-v0.1-GPTQ": {
|
196 |
chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
|
197 |
data: {
|
198 |
messages: EXAMPLE_CHAT,
|
|
|
440 |
});
|
441 |
|
442 |
it("should parse a chat template from the Hugging Face Hub", async () => {
|
443 |
+
const repo = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ";
|
444 |
const tokenizerConfig = await (
|
445 |
await downloadFile({
|
446 |
repo,
|
packages/tasks/package.json
CHANGED
@@ -27,7 +27,8 @@
|
|
27 |
"build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen",
|
28 |
"prepare": "pnpm run build",
|
29 |
"check": "tsc",
|
30 |
-
"inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts"
|
|
|
31 |
},
|
32 |
"type": "module",
|
33 |
"files": [
|
@@ -44,6 +45,10 @@
|
|
44 |
"license": "MIT",
|
45 |
"devDependencies": {
|
46 |
"@types/node": "^20.11.5",
|
47 |
-
"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz"
|
|
|
|
|
|
|
|
|
48 |
}
|
49 |
}
|
|
|
27 |
"build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen",
|
28 |
"prepare": "pnpm run build",
|
29 |
"check": "tsc",
|
30 |
+
"inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts",
|
31 |
+
"inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json"
|
32 |
},
|
33 |
"type": "module",
|
34 |
"files": [
|
|
|
45 |
"license": "MIT",
|
46 |
"devDependencies": {
|
47 |
"@types/node": "^20.11.5",
|
48 |
+
"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",
|
49 |
+
"type-fest": "^3.13.1"
|
50 |
+
},
|
51 |
+
"dependencies": {
|
52 |
+
"node-fetch": "^3.3.2"
|
53 |
}
|
54 |
}
|
packages/tasks/pnpm-lock.yaml
CHANGED
@@ -4,6 +4,11 @@ settings:
|
|
4 |
autoInstallPeers: true
|
5 |
excludeLinksFromLockfile: false
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
devDependencies:
|
8 |
'@types/node':
|
9 |
specifier: ^20.11.5
|
@@ -11,6 +16,9 @@ devDependencies:
|
|
11 |
quicktype-core:
|
12 |
specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz
|
13 |
version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz'
|
|
|
|
|
|
|
14 |
|
15 |
packages:
|
16 |
|
@@ -62,6 +70,11 @@ packages:
|
|
62 |
- encoding
|
63 |
dev: true
|
64 |
|
|
|
|
|
|
|
|
|
|
|
65 | |
66 |
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
|
67 |
engines: {node: '>=6'}
|
@@ -72,6 +85,21 @@ packages:
|
|
72 |
engines: {node: '>=0.8.x'}
|
73 |
dev: true
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 | |
76 |
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
|
77 |
dev: true
|
@@ -88,6 +116,11 @@ packages:
|
|
88 |
resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
|
89 |
dev: true
|
90 |
|
|
|
|
|
|
|
|
|
|
|
91 | |
92 |
resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
|
93 |
engines: {node: 4.x || >=6.0.0}
|
@@ -100,6 +133,15 @@ packages:
|
|
100 |
whatwg-url: 5.0.0
|
101 |
dev: true
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 | |
104 |
resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
|
105 |
dev: true
|
@@ -147,6 +189,11 @@ packages:
|
|
147 |
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
148 |
dev: true
|
149 |
|
|
|
|
|
|
|
|
|
|
|
150 | |
151 |
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
|
152 |
dev: true
|
@@ -169,6 +216,11 @@ packages:
|
|
169 |
resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
|
170 |
dev: true
|
171 |
|
|
|
|
|
|
|
|
|
|
|
172 | |
173 |
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
|
174 |
dev: true
|
|
|
4 |
autoInstallPeers: true
|
5 |
excludeLinksFromLockfile: false
|
6 |
|
7 |
+
dependencies:
|
8 |
+
node-fetch:
|
9 |
+
specifier: ^3.3.2
|
10 |
+
version: 3.3.2
|
11 |
+
|
12 |
devDependencies:
|
13 |
'@types/node':
|
14 |
specifier: ^20.11.5
|
|
|
16 |
quicktype-core:
|
17 |
specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz
|
18 |
version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz'
|
19 |
+
type-fest:
|
20 |
+
specifier: ^3.13.1
|
21 |
+
version: 3.13.1
|
22 |
|
23 |
packages:
|
24 |
|
|
|
70 |
- encoding
|
71 |
dev: true
|
72 |
|
73 | |
74 |
+
resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==}
|
75 |
+
engines: {node: '>= 12'}
|
76 |
+
dev: false
|
77 |
+
|
78 | |
79 |
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
|
80 |
engines: {node: '>=6'}
|
|
|
85 |
engines: {node: '>=0.8.x'}
|
86 |
dev: true
|
87 |
|
88 | |
89 |
+
resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==}
|
90 |
+
engines: {node: ^12.20 || >= 14.13}
|
91 |
+
dependencies:
|
92 |
+
node-domexception: 1.0.0
|
93 |
+
web-streams-polyfill: 3.3.3
|
94 |
+
dev: false
|
95 |
+
|
96 | |
97 |
+
resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==}
|
98 |
+
engines: {node: '>=12.20.0'}
|
99 |
+
dependencies:
|
100 |
+
fetch-blob: 3.2.0
|
101 |
+
dev: false
|
102 |
+
|
103 | |
104 |
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
|
105 |
dev: true
|
|
|
116 |
resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
|
117 |
dev: true
|
118 |
|
119 | |
120 |
+
resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
|
121 |
+
engines: {node: '>=10.5.0'}
|
122 |
+
dev: false
|
123 |
+
|
124 | |
125 |
resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
|
126 |
engines: {node: 4.x || >=6.0.0}
|
|
|
133 |
whatwg-url: 5.0.0
|
134 |
dev: true
|
135 |
|
136 | |
137 |
+
resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==}
|
138 |
+
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
|
139 |
+
dependencies:
|
140 |
+
data-uri-to-buffer: 4.0.1
|
141 |
+
fetch-blob: 3.2.0
|
142 |
+
formdata-polyfill: 4.0.10
|
143 |
+
dev: false
|
144 |
+
|
145 | |
146 |
resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
|
147 |
dev: true
|
|
|
189 |
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
190 |
dev: true
|
191 |
|
192 | |
193 |
+
resolution: {integrity: sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==}
|
194 |
+
engines: {node: '>=14.16'}
|
195 |
+
dev: true
|
196 |
+
|
197 | |
198 |
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
|
199 |
dev: true
|
|
|
216 |
resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
|
217 |
dev: true
|
218 |
|
219 | |
220 |
+
resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==}
|
221 |
+
engines: {node: '>= 8'}
|
222 |
+
dev: false
|
223 |
+
|
224 | |
225 |
resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
|
226 |
dev: true
|
packages/tasks/scripts/inference-codegen.ts
CHANGED
@@ -57,7 +57,7 @@ async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles:
|
|
57 |
if (taskId === "text-generation" || taskId === "chat-completion") {
|
58 |
await schema.addSource({
|
59 |
name: `${taskId}-stream-output`,
|
60 |
-
schema: await fs.readFile(`${taskSpecDir}/
|
61 |
});
|
62 |
}
|
63 |
const inputData = new InputData();
|
|
|
57 |
if (taskId === "text-generation" || taskId === "chat-completion") {
|
58 |
await schema.addSource({
|
59 |
name: `${taskId}-stream-output`,
|
60 |
+
schema: await fs.readFile(`${taskSpecDir}/stream_output.json`, { encoding: "utf-8" }),
|
61 |
});
|
62 |
}
|
63 |
const inputData = new InputData();
|
packages/tasks/scripts/inference-tgi-import.ts
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Fetches TGI specs and generated JSON schema for input, output and stream_output of
|
3 |
+
* text-generation and chat-completion tasks.
|
4 |
+
* See https://huggingface.github.io/text-generation-inference/
|
5 |
+
*/
|
6 |
+
import fs from "fs/promises";
|
7 |
+
import fetch from "node-fetch";
|
8 |
+
import * as path from "node:path/posix";
|
9 |
+
import { existsSync as pathExists } from "node:fs";
|
10 |
+
import type { JsonObject, JsonValue } from "type-fest";
|
11 |
+
|
12 |
+
const URL = "https://huggingface.github.io/text-generation-inference/openapi.json";
|
13 |
+
|
14 |
+
const rootDirFinder = function (): string {
|
15 |
+
let currentPath = path.normalize(import.meta.url);
|
16 |
+
|
17 |
+
while (currentPath !== "/") {
|
18 |
+
if (pathExists(path.join(currentPath, "package.json"))) {
|
19 |
+
return currentPath;
|
20 |
+
}
|
21 |
+
|
22 |
+
currentPath = path.normalize(path.join(currentPath, ".."));
|
23 |
+
}
|
24 |
+
|
25 |
+
return "/";
|
26 |
+
};
|
27 |
+
|
28 |
+
const rootDir = rootDirFinder();
|
29 |
+
const tasksDir = path.join(rootDir, "src", "tasks");
|
30 |
+
|
31 |
+
function toCamelCase(str: string, joiner = "") {
|
32 |
+
return str
|
33 |
+
.split(/[-_]/)
|
34 |
+
.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
|
35 |
+
.join(joiner);
|
36 |
+
}
|
37 |
+
|
38 |
+
async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") {
|
39 |
+
console.debug(`✨ Importing`, task, type);
|
40 |
+
|
41 |
+
console.debug(" 📥 Fetching TGI specs");
|
42 |
+
const response = await fetch(URL);
|
43 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
44 |
+
const openapi = (await response.json()) as any;
|
45 |
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
46 |
+
const components: Record<string, any> = openapi["components"]["schemas"];
|
47 |
+
|
48 |
+
// e.g. TextGeneration
|
49 |
+
const camelName = toCamelCase(task);
|
50 |
+
// e.g. TextGenerationInput
|
51 |
+
const camelFullName = camelName + toCamelCase(type);
|
52 |
+
const mainComponent = components[mainComponentName];
|
53 |
+
const filteredComponents: Record<string, JsonObject> = {};
|
54 |
+
|
55 |
+
function _scan(data: JsonValue) {
|
56 |
+
if (Array.isArray(data) || data instanceof Array) {
|
57 |
+
for (const item of data) {
|
58 |
+
_scan(item);
|
59 |
+
}
|
60 |
+
} else if (data && typeof data === "object") {
|
61 |
+
for (const key of Object.keys(data)) {
|
62 |
+
if (key === "$ref" && typeof data[key] === "string") {
|
63 |
+
// Verify reference exists
|
64 |
+
const ref = (data[key] as string).split("/").pop() ?? "";
|
65 |
+
if (!components[ref]) {
|
66 |
+
throw new Error(`Reference not found in components: ${data[key]}`);
|
67 |
+
}
|
68 |
+
|
69 |
+
// Add reference to components to export (and scan it too)
|
70 |
+
const newRef = camelFullName + ref.replace(camelName, "");
|
71 |
+
if (!filteredComponents[newRef]) {
|
72 |
+
components[ref]["title"] = newRef; // Rename title to avoid conflicts
|
73 |
+
filteredComponents[newRef] = components[ref];
|
74 |
+
_scan(components[ref]);
|
75 |
+
}
|
76 |
+
|
77 |
+
// Updating the reference to new format
|
78 |
+
data[key] = `#/$defs/${newRef}`;
|
79 |
+
} else {
|
80 |
+
_scan(data[key]);
|
81 |
+
}
|
82 |
+
}
|
83 |
+
}
|
84 |
+
}
|
85 |
+
|
86 |
+
console.debug(" 📦 Packaging jsonschema");
|
87 |
+
_scan(mainComponent);
|
88 |
+
|
89 |
+
const prettyName = toCamelCase(task, " ") + " " + toCamelCase(type, " ");
|
90 |
+
const inputSchema = {
|
91 |
+
$id: `/inference/schemas/${task}/${type}.json`,
|
92 |
+
$schema: "http://json-schema.org/draft-06/schema#",
|
93 |
+
description:
|
94 |
+
prettyName +
|
95 |
+
".\n\nAuto-generated from TGI specs." +
|
96 |
+
"\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
97 |
+
title: camelFullName,
|
98 |
+
type: "object",
|
99 |
+
required: mainComponent["required"],
|
100 |
+
properties: mainComponent["properties"],
|
101 |
+
$defs: filteredComponents,
|
102 |
+
};
|
103 |
+
|
104 |
+
const specPath = path.join(tasksDir, task, "spec", `${type}.json`);
|
105 |
+
console.debug(" 📂 Exporting", specPath);
|
106 |
+
await fs.writeFile(specPath, JSON.stringify(inputSchema, null, 4));
|
107 |
+
}
|
108 |
+
|
109 |
+
await _extractAndAdapt("text-generation", "CompatGenerateRequest", "input");
|
110 |
+
await _extractAndAdapt("text-generation", "GenerateResponse", "output");
|
111 |
+
await _extractAndAdapt("text-generation", "StreamResponse", "stream_output");
|
112 |
+
await _extractAndAdapt("chat-completion", "ChatRequest", "input");
|
113 |
+
await _extractAndAdapt("chat-completion", "ChatCompletion", "output");
|
114 |
+
await _extractAndAdapt("chat-completion", "ChatCompletionChunk", "stream_output");
|
115 |
+
console.debug("✅ All done!");
|
packages/tasks/src/tasks/chat-completion/inference.ts
CHANGED
@@ -5,154 +5,273 @@
|
|
5 |
*/
|
6 |
|
7 |
/**
|
8 |
-
*
|
|
|
|
|
|
|
|
|
9 |
*/
|
10 |
export interface ChatCompletionInput {
|
11 |
/**
|
12 |
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
|
13 |
-
* frequency in the text so far,
|
14 |
-
* verbatim.
|
15 |
*/
|
16 |
frequency_penalty?: number;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
/**
|
18 |
* The maximum number of tokens that can be generated in the chat completion.
|
19 |
*/
|
20 |
max_tokens?: number;
|
|
|
|
|
|
|
21 |
messages: ChatCompletionInputMessage[];
|
22 |
/**
|
23 |
-
*
|
|
|
24 |
*/
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
/**
|
27 |
-
*
|
|
|
|
|
28 |
*/
|
29 |
-
|
|
|
30 |
/**
|
31 |
-
*
|
32 |
*/
|
|
|
33 |
stream?: boolean;
|
34 |
/**
|
35 |
-
*
|
|
|
|
|
|
|
|
|
36 |
*/
|
37 |
temperature?: number;
|
|
|
|
|
|
|
|
|
|
|
38 |
/**
|
39 |
-
*
|
40 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
*/
|
42 |
top_p?: number;
|
43 |
[property: string]: unknown;
|
44 |
}
|
45 |
|
46 |
export interface ChatCompletionInputMessage {
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
role: ChatCompletionMessageRole;
|
52 |
[property: string]: unknown;
|
53 |
}
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
/**
|
66 |
-
*
|
|
|
|
|
|
|
|
|
67 |
*/
|
68 |
export interface ChatCompletionOutput {
|
69 |
-
|
70 |
-
* A list of chat completion choices.
|
71 |
-
*/
|
72 |
-
choices: ChatCompletionOutputChoice[];
|
73 |
-
/**
|
74 |
-
* The Unix timestamp (in seconds) of when the chat completion was created.
|
75 |
-
*/
|
76 |
created: number;
|
|
|
|
|
|
|
|
|
|
|
77 |
[property: string]: unknown;
|
78 |
}
|
79 |
|
80 |
-
export interface
|
81 |
-
|
82 |
-
* The reason why the generation was stopped.
|
83 |
-
*/
|
84 |
-
finish_reason: ChatCompletionFinishReason;
|
85 |
-
/**
|
86 |
-
* The index of the choice in the list of choices.
|
87 |
-
*/
|
88 |
index: number;
|
89 |
-
|
|
|
90 |
[property: string]: unknown;
|
91 |
}
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
*
|
98 |
-
* The model generated an end-of-sentence (EOS) token
|
99 |
-
*
|
100 |
-
* One of the sequence in stop_sequences was generated
|
101 |
-
*/
|
102 |
-
export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
|
103 |
|
104 |
-
export interface
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
[property: string]: unknown;
|
111 |
}
|
112 |
|
113 |
/**
|
114 |
-
* Chat Completion Stream Output
|
|
|
|
|
|
|
|
|
115 |
*/
|
116 |
export interface ChatCompletionStreamOutput {
|
117 |
-
/**
|
118 |
-
* A list of chat completion choices.
|
119 |
-
*/
|
120 |
choices: ChatCompletionStreamOutputChoice[];
|
121 |
-
/**
|
122 |
-
* The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
|
123 |
-
* the same timestamp.
|
124 |
-
*/
|
125 |
created: number;
|
|
|
|
|
|
|
|
|
126 |
[property: string]: unknown;
|
127 |
}
|
128 |
|
129 |
export interface ChatCompletionStreamOutputChoice {
|
130 |
-
/**
|
131 |
-
* A chat completion delta generated by streamed model responses.
|
132 |
-
*/
|
133 |
delta: ChatCompletionStreamOutputDelta;
|
134 |
-
|
135 |
-
* The reason why the generation was stopped.
|
136 |
-
*/
|
137 |
-
finish_reason?: ChatCompletionFinishReason;
|
138 |
-
/**
|
139 |
-
* The index of the choice in the list of choices.
|
140 |
-
*/
|
141 |
index: number;
|
|
|
142 |
[property: string]: unknown;
|
143 |
}
|
144 |
|
145 |
-
/**
|
146 |
-
* A chat completion delta generated by streamed model responses.
|
147 |
-
*/
|
148 |
export interface ChatCompletionStreamOutputDelta {
|
149 |
-
/**
|
150 |
-
* The contents of the chunk message.
|
151 |
-
*/
|
152 |
content?: string;
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
[property: string]: unknown;
|
158 |
}
|
|
|
5 |
*/
|
6 |
|
7 |
/**
|
8 |
+
* Chat Completion Input.
|
9 |
+
*
|
10 |
+
* Auto-generated from TGI specs.
|
11 |
+
* For more details, check out
|
12 |
+
* https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
|
13 |
*/
|
14 |
export interface ChatCompletionInput {
|
15 |
/**
|
16 |
* Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
|
17 |
+
* frequency in the text so far,
|
18 |
+
* decreasing the model's likelihood to repeat the same line verbatim.
|
19 |
*/
|
20 |
frequency_penalty?: number;
|
21 |
+
/**
|
22 |
+
* UNUSED
|
23 |
+
* Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
|
24 |
+
* object that maps tokens
|
25 |
+
* (specified by their token ID in the tokenizer) to an associated bias value from -100 to
|
26 |
+
* 100. Mathematically,
|
27 |
+
* the bias is added to the logits generated by the model prior to sampling. The exact
|
28 |
+
* effect will vary per model,
|
29 |
+
* but values between -1 and 1 should decrease or increase likelihood of selection; values
|
30 |
+
* like -100 or 100 should
|
31 |
+
* result in a ban or exclusive selection of the relevant token.
|
32 |
+
*/
|
33 |
+
logit_bias?: number[];
|
34 |
+
/**
|
35 |
+
* Whether to return log probabilities of the output tokens or not. If true, returns the log
|
36 |
+
* probabilities of each
|
37 |
+
* output token returned in the content of message.
|
38 |
+
*/
|
39 |
+
logprobs?: boolean;
|
40 |
/**
|
41 |
* The maximum number of tokens that can be generated in the chat completion.
|
42 |
*/
|
43 |
max_tokens?: number;
|
44 |
+
/**
|
45 |
+
* A list of messages comprising the conversation so far.
|
46 |
+
*/
|
47 |
messages: ChatCompletionInputMessage[];
|
48 |
/**
|
49 |
+
* [UNUSED] ID of the model to use. See the model endpoint compatibility table for details
|
50 |
+
* on which models work with the Chat API.
|
51 |
*/
|
52 |
+
model: string;
|
53 |
+
/**
|
54 |
+
* UNUSED
|
55 |
+
* How many chat completion choices to generate for each input message. Note that you will
|
56 |
+
* be charged based on the
|
57 |
+
* number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
|
58 |
+
*/
|
59 |
+
n?: number;
|
60 |
/**
|
61 |
+
* Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
|
62 |
+
* appear in the text so far,
|
63 |
+
* increasing the model's likelihood to talk about new topics
|
64 |
*/
|
65 |
+
presence_penalty?: number;
|
66 |
+
seed?: number;
|
67 |
/**
|
68 |
+
* Up to 4 sequences where the API will stop generating further tokens.
|
69 |
*/
|
70 |
+
stop?: string[];
|
71 |
stream?: boolean;
|
72 |
/**
|
73 |
+
* What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
|
74 |
+
* output more random, while
|
75 |
+
* lower values like 0.2 will make it more focused and deterministic.
|
76 |
+
*
|
77 |
+
* We generally recommend altering this or `top_p` but not both.
|
78 |
*/
|
79 |
temperature?: number;
|
80 |
+
tool_choice?: ChatCompletionInputToolType;
|
81 |
+
/**
|
82 |
+
* A prompt to be appended before the tools
|
83 |
+
*/
|
84 |
+
tool_prompt?: string;
|
85 |
/**
|
86 |
+
* A list of tools the model may call. Currently, only functions are supported as a tool.
|
87 |
+
* Use this to provide a list of
|
88 |
+
* functions the model may generate JSON inputs for.
|
89 |
+
*/
|
90 |
+
tools?: ChatCompletionInputTool[];
|
91 |
+
/**
|
92 |
+
* An integer between 0 and 5 specifying the number of most likely tokens to return at each
|
93 |
+
* token position, each with
|
94 |
+
* an associated log probability. logprobs must be set to true if this parameter is used.
|
95 |
+
*/
|
96 |
+
top_logprobs?: number;
|
97 |
+
/**
|
98 |
+
* An alternative to sampling with temperature, called nucleus sampling, where the model
|
99 |
+
* considers the results of the
|
100 |
+
* tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%
|
101 |
+
* probability mass are considered.
|
102 |
*/
|
103 |
top_p?: number;
|
104 |
[property: string]: unknown;
|
105 |
}
|
106 |
|
107 |
export interface ChatCompletionInputMessage {
|
108 |
+
content?: string;
|
109 |
+
name?: string;
|
110 |
+
role: string;
|
111 |
+
tool_calls?: ChatCompletionInputToolCall[];
|
|
|
112 |
[property: string]: unknown;
|
113 |
}
|
114 |
|
115 |
+
export interface ChatCompletionInputToolCall {
|
116 |
+
function: ChatCompletionInputFunctionDefinition;
|
117 |
+
id: number;
|
118 |
+
type: string;
|
119 |
+
[property: string]: unknown;
|
120 |
+
}
|
121 |
|
122 |
+
export interface ChatCompletionInputFunctionDefinition {
|
123 |
+
arguments: unknown;
|
124 |
+
description?: string;
|
125 |
+
name: string;
|
126 |
+
[property: string]: unknown;
|
127 |
+
}
|
128 |
+
|
129 |
+
export type ChatCompletionInputToolType = "OneOf" | ChatCompletionInputToolTypeObject;
|
130 |
+
|
131 |
+
export interface ChatCompletionInputToolTypeObject {
|
132 |
+
FunctionName: string;
|
133 |
+
[property: string]: unknown;
|
134 |
+
}
|
135 |
+
|
136 |
+
export interface ChatCompletionInputTool {
|
137 |
+
function: ChatCompletionInputFunctionDefinition;
|
138 |
+
type: string;
|
139 |
+
[property: string]: unknown;
|
140 |
+
}
|
141 |
|
142 |
/**
|
143 |
+
* Chat Completion Output.
|
144 |
+
*
|
145 |
+
* Auto-generated from TGI specs.
|
146 |
+
* For more details, check out
|
147 |
+
* https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
|
148 |
*/
|
149 |
export interface ChatCompletionOutput {
|
150 |
+
choices: ChatCompletionOutputComplete[];
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
created: number;
|
152 |
+
id: string;
|
153 |
+
model: string;
|
154 |
+
object: string;
|
155 |
+
system_fingerprint: string;
|
156 |
+
usage: ChatCompletionOutputUsage;
|
157 |
[property: string]: unknown;
|
158 |
}
|
159 |
|
160 |
+
export interface ChatCompletionOutputComplete {
|
161 |
+
finish_reason: string;
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
index: number;
|
163 |
+
logprobs?: ChatCompletionOutputLogprobs;
|
164 |
+
message: ChatCompletionOutputMessage;
|
165 |
[property: string]: unknown;
|
166 |
}
|
167 |
|
168 |
+
export interface ChatCompletionOutputLogprobs {
|
169 |
+
content: ChatCompletionOutputLogprob[];
|
170 |
+
[property: string]: unknown;
|
171 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
+
export interface ChatCompletionOutputLogprob {
|
174 |
+
logprob: number;
|
175 |
+
token: string;
|
176 |
+
top_logprobs: ChatCompletionOutputTopLogprob[];
|
177 |
+
[property: string]: unknown;
|
178 |
+
}
|
179 |
+
|
180 |
+
export interface ChatCompletionOutputTopLogprob {
|
181 |
+
logprob: number;
|
182 |
+
token: string;
|
183 |
+
[property: string]: unknown;
|
184 |
+
}
|
185 |
+
|
186 |
+
export interface ChatCompletionOutputMessage {
|
187 |
+
content?: string;
|
188 |
+
name?: string;
|
189 |
+
role: string;
|
190 |
+
tool_calls?: ChatCompletionOutputToolCall[];
|
191 |
+
[property: string]: unknown;
|
192 |
+
}
|
193 |
+
|
194 |
+
export interface ChatCompletionOutputToolCall {
|
195 |
+
function: ChatCompletionOutputFunctionDefinition;
|
196 |
+
id: number;
|
197 |
+
type: string;
|
198 |
+
[property: string]: unknown;
|
199 |
+
}
|
200 |
+
|
201 |
+
export interface ChatCompletionOutputFunctionDefinition {
|
202 |
+
arguments: unknown;
|
203 |
+
description?: string;
|
204 |
+
name: string;
|
205 |
+
[property: string]: unknown;
|
206 |
+
}
|
207 |
+
|
208 |
+
export interface ChatCompletionOutputUsage {
|
209 |
+
completion_tokens: number;
|
210 |
+
prompt_tokens: number;
|
211 |
+
total_tokens: number;
|
212 |
[property: string]: unknown;
|
213 |
}
|
214 |
|
215 |
/**
|
216 |
+
* Chat Completion Stream Output.
|
217 |
+
*
|
218 |
+
* Auto-generated from TGI specs.
|
219 |
+
* For more details, check out
|
220 |
+
* https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
|
221 |
*/
|
222 |
export interface ChatCompletionStreamOutput {
|
|
|
|
|
|
|
223 |
choices: ChatCompletionStreamOutputChoice[];
|
|
|
|
|
|
|
|
|
224 |
created: number;
|
225 |
+
id: string;
|
226 |
+
model: string;
|
227 |
+
object: string;
|
228 |
+
system_fingerprint: string;
|
229 |
[property: string]: unknown;
|
230 |
}
|
231 |
|
232 |
export interface ChatCompletionStreamOutputChoice {
|
|
|
|
|
|
|
233 |
delta: ChatCompletionStreamOutputDelta;
|
234 |
+
finish_reason?: string;
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
index: number;
|
236 |
+
logprobs?: ChatCompletionStreamOutputLogprobs;
|
237 |
[property: string]: unknown;
|
238 |
}
|
239 |
|
|
|
|
|
|
|
240 |
export interface ChatCompletionStreamOutputDelta {
|
|
|
|
|
|
|
241 |
content?: string;
|
242 |
+
role: string;
|
243 |
+
tool_calls?: ChatCompletionStreamOutputDeltaToolCall;
|
244 |
+
[property: string]: unknown;
|
245 |
+
}
|
246 |
+
|
247 |
+
export interface ChatCompletionStreamOutputDeltaToolCall {
|
248 |
+
function: ChatCompletionStreamOutputFunction;
|
249 |
+
id: string;
|
250 |
+
index: number;
|
251 |
+
type: string;
|
252 |
+
[property: string]: unknown;
|
253 |
+
}
|
254 |
+
|
255 |
+
export interface ChatCompletionStreamOutputFunction {
|
256 |
+
arguments: string;
|
257 |
+
name?: string;
|
258 |
+
[property: string]: unknown;
|
259 |
+
}
|
260 |
+
|
261 |
+
export interface ChatCompletionStreamOutputLogprobs {
|
262 |
+
content: ChatCompletionStreamOutputLogprob[];
|
263 |
+
[property: string]: unknown;
|
264 |
+
}
|
265 |
+
|
266 |
+
export interface ChatCompletionStreamOutputLogprob {
|
267 |
+
logprob: number;
|
268 |
+
token: string;
|
269 |
+
top_logprobs: ChatCompletionStreamOutputTopLogprob[];
|
270 |
+
[property: string]: unknown;
|
271 |
+
}
|
272 |
+
|
273 |
+
export interface ChatCompletionStreamOutputTopLogprob {
|
274 |
+
logprob: number;
|
275 |
+
token: string;
|
276 |
[property: string]: unknown;
|
277 |
}
|
packages/tasks/src/tasks/chat-completion/spec/input.json
CHANGED
@@ -1,63 +1,227 @@
|
|
1 |
{
|
2 |
-
"title": "ChatCompletionInput",
|
3 |
"$id": "/inference/schemas/chat-completion/input.json",
|
4 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
5 |
-
"description": "
|
|
|
6 |
"type": "object",
|
|
|
7 |
"properties": {
|
8 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"type": "array",
|
10 |
-
"title": "ChatCompletionInputMessage",
|
11 |
"items": {
|
12 |
-
"type": "
|
13 |
-
"
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
"content": {
|
18 |
-
"type": "string",
|
19 |
-
"description": "The content of the message."
|
20 |
-
}
|
21 |
-
},
|
22 |
-
"required": ["role", "content"]
|
23 |
-
}
|
24 |
},
|
25 |
-
"
|
26 |
-
"type": "
|
27 |
-
"description": "
|
|
|
|
|
28 |
},
|
29 |
"max_tokens": {
|
30 |
"type": "integer",
|
31 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
},
|
33 |
"seed": {
|
34 |
"type": "integer",
|
35 |
-
"
|
|
|
|
|
|
|
36 |
},
|
37 |
"stop": {
|
38 |
-
"
|
39 |
-
"
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
},
|
42 |
"stream": {
|
43 |
-
"type": "boolean"
|
44 |
-
"description": "If set, partial message deltas will be sent."
|
45 |
},
|
46 |
"temperature": {
|
47 |
"type": "number",
|
48 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
},
|
50 |
"top_p": {
|
51 |
"type": "number",
|
52 |
-
"
|
|
|
|
|
|
|
53 |
}
|
54 |
},
|
55 |
-
"
|
56 |
-
|
57 |
-
|
58 |
-
"
|
59 |
-
"
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
}
|
62 |
}
|
63 |
}
|
|
|
1 |
{
|
|
|
2 |
"$id": "/inference/schemas/chat-completion/input.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
+
"description": "Chat Completion Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
5 |
+
"title": "ChatCompletionInput",
|
6 |
"type": "object",
|
7 |
+
"required": ["model", "messages"],
|
8 |
"properties": {
|
9 |
+
"frequency_penalty": {
|
10 |
+
"type": "number",
|
11 |
+
"format": "float",
|
12 |
+
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
|
13 |
+
"example": "1.0",
|
14 |
+
"nullable": true
|
15 |
+
},
|
16 |
+
"logit_bias": {
|
17 |
"type": "array",
|
|
|
18 |
"items": {
|
19 |
+
"type": "number",
|
20 |
+
"format": "float"
|
21 |
+
},
|
22 |
+
"description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
|
23 |
+
"nullable": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
},
|
25 |
+
"logprobs": {
|
26 |
+
"type": "boolean",
|
27 |
+
"description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
|
28 |
+
"example": "false",
|
29 |
+
"nullable": true
|
30 |
},
|
31 |
"max_tokens": {
|
32 |
"type": "integer",
|
33 |
+
"format": "int32",
|
34 |
+
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
35 |
+
"example": "32",
|
36 |
+
"nullable": true,
|
37 |
+
"minimum": 0
|
38 |
+
},
|
39 |
+
"messages": {
|
40 |
+
"type": "array",
|
41 |
+
"items": {
|
42 |
+
"$ref": "#/$defs/ChatCompletionInputMessage"
|
43 |
+
},
|
44 |
+
"description": "A list of messages comprising the conversation so far.",
|
45 |
+
"example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]"
|
46 |
+
},
|
47 |
+
"model": {
|
48 |
+
"type": "string",
|
49 |
+
"description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
|
50 |
+
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
51 |
+
},
|
52 |
+
"n": {
|
53 |
+
"type": "integer",
|
54 |
+
"format": "int32",
|
55 |
+
"description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
|
56 |
+
"example": "2",
|
57 |
+
"nullable": true,
|
58 |
+
"minimum": 0
|
59 |
+
},
|
60 |
+
"presence_penalty": {
|
61 |
+
"type": "number",
|
62 |
+
"format": "float",
|
63 |
+
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
|
64 |
+
"example": 0.1,
|
65 |
+
"nullable": true
|
66 |
},
|
67 |
"seed": {
|
68 |
"type": "integer",
|
69 |
+
"format": "int64",
|
70 |
+
"example": 42,
|
71 |
+
"nullable": true,
|
72 |
+
"minimum": 0
|
73 |
},
|
74 |
"stop": {
|
75 |
+
"type": "array",
|
76 |
+
"items": {
|
77 |
+
"type": "string"
|
78 |
+
},
|
79 |
+
"description": "Up to 4 sequences where the API will stop generating further tokens.",
|
80 |
+
"example": "null",
|
81 |
+
"nullable": true
|
82 |
},
|
83 |
"stream": {
|
84 |
+
"type": "boolean"
|
|
|
85 |
},
|
86 |
"temperature": {
|
87 |
"type": "number",
|
88 |
+
"format": "float",
|
89 |
+
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
|
90 |
+
"example": 1,
|
91 |
+
"nullable": true
|
92 |
+
},
|
93 |
+
"tool_choice": {
|
94 |
+
"allOf": [
|
95 |
+
{
|
96 |
+
"$ref": "#/$defs/ChatCompletionInputToolType"
|
97 |
+
}
|
98 |
+
],
|
99 |
+
"nullable": true
|
100 |
+
},
|
101 |
+
"tool_prompt": {
|
102 |
+
"type": "string",
|
103 |
+
"description": "A prompt to be appended before the tools",
|
104 |
+
"example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
|
105 |
+
"nullable": true
|
106 |
+
},
|
107 |
+
"tools": {
|
108 |
+
"type": "array",
|
109 |
+
"items": {
|
110 |
+
"$ref": "#/$defs/ChatCompletionInputTool"
|
111 |
+
},
|
112 |
+
"description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.",
|
113 |
+
"example": "null",
|
114 |
+
"nullable": true
|
115 |
+
},
|
116 |
+
"top_logprobs": {
|
117 |
+
"type": "integer",
|
118 |
+
"format": "int32",
|
119 |
+
"description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
|
120 |
+
"example": "5",
|
121 |
+
"nullable": true,
|
122 |
+
"minimum": 0
|
123 |
},
|
124 |
"top_p": {
|
125 |
"type": "number",
|
126 |
+
"format": "float",
|
127 |
+
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
|
128 |
+
"example": 0.95,
|
129 |
+
"nullable": true
|
130 |
}
|
131 |
},
|
132 |
+
"$defs": {
|
133 |
+
"ChatCompletionInputMessage": {
|
134 |
+
"type": "object",
|
135 |
+
"required": ["role"],
|
136 |
+
"properties": {
|
137 |
+
"content": {
|
138 |
+
"type": "string",
|
139 |
+
"example": "My name is David and I",
|
140 |
+
"nullable": true
|
141 |
+
},
|
142 |
+
"name": {
|
143 |
+
"type": "string",
|
144 |
+
"example": "\"David\"",
|
145 |
+
"nullable": true
|
146 |
+
},
|
147 |
+
"role": {
|
148 |
+
"type": "string",
|
149 |
+
"example": "user"
|
150 |
+
},
|
151 |
+
"tool_calls": {
|
152 |
+
"type": "array",
|
153 |
+
"items": {
|
154 |
+
"$ref": "#/$defs/ChatCompletionInputToolCall"
|
155 |
+
},
|
156 |
+
"nullable": true
|
157 |
+
}
|
158 |
+
},
|
159 |
+
"title": "ChatCompletionInputMessage"
|
160 |
+
},
|
161 |
+
"ChatCompletionInputToolCall": {
|
162 |
+
"type": "object",
|
163 |
+
"required": ["id", "type", "function"],
|
164 |
+
"properties": {
|
165 |
+
"function": {
|
166 |
+
"$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
|
167 |
+
},
|
168 |
+
"id": {
|
169 |
+
"type": "integer",
|
170 |
+
"format": "int32",
|
171 |
+
"minimum": 0
|
172 |
+
},
|
173 |
+
"type": {
|
174 |
+
"type": "string"
|
175 |
+
}
|
176 |
+
},
|
177 |
+
"title": "ChatCompletionInputToolCall"
|
178 |
+
},
|
179 |
+
"ChatCompletionInputFunctionDefinition": {
|
180 |
+
"type": "object",
|
181 |
+
"required": ["name", "arguments"],
|
182 |
+
"properties": {
|
183 |
+
"arguments": {},
|
184 |
+
"description": {
|
185 |
+
"type": "string",
|
186 |
+
"nullable": true
|
187 |
+
},
|
188 |
+
"name": {
|
189 |
+
"type": "string"
|
190 |
+
}
|
191 |
+
},
|
192 |
+
"title": "ChatCompletionInputFunctionDefinition"
|
193 |
+
},
|
194 |
+
"ChatCompletionInputToolType": {
|
195 |
+
"oneOf": [
|
196 |
+
{
|
197 |
+
"type": "object",
|
198 |
+
"required": ["FunctionName"],
|
199 |
+
"properties": {
|
200 |
+
"FunctionName": {
|
201 |
+
"type": "string"
|
202 |
+
}
|
203 |
+
}
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"type": "string",
|
207 |
+
"enum": ["OneOf"]
|
208 |
+
}
|
209 |
+
],
|
210 |
+
"title": "ChatCompletionInputToolType"
|
211 |
+
},
|
212 |
+
"ChatCompletionInputTool": {
|
213 |
+
"type": "object",
|
214 |
+
"required": ["type", "function"],
|
215 |
+
"properties": {
|
216 |
+
"function": {
|
217 |
+
"$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
|
218 |
+
},
|
219 |
+
"type": {
|
220 |
+
"type": "string",
|
221 |
+
"example": "function"
|
222 |
+
}
|
223 |
+
},
|
224 |
+
"title": "ChatCompletionInputTool"
|
225 |
}
|
226 |
}
|
227 |
}
|
packages/tasks/src/tasks/chat-completion/spec/output.json
CHANGED
@@ -1,58 +1,196 @@
|
|
1 |
{
|
2 |
"$id": "/inference/schemas/chat-completion/output.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
-
"description": "
|
5 |
"title": "ChatCompletionOutput",
|
6 |
"type": "object",
|
|
|
7 |
"properties": {
|
8 |
"choices": {
|
9 |
"type": "array",
|
10 |
-
"description": "A list of chat completion choices.",
|
11 |
-
"title": "ChatCompletionOutputChoice",
|
12 |
"items": {
|
13 |
-
"
|
14 |
-
"properties": {
|
15 |
-
"finish_reason": {
|
16 |
-
"$ref": "#/definitions/FinishReason",
|
17 |
-
"description": "The reason why the generation was stopped."
|
18 |
-
},
|
19 |
-
"index": {
|
20 |
-
"type": "integer",
|
21 |
-
"description": "The index of the choice in the list of choices."
|
22 |
-
},
|
23 |
-
"message": {
|
24 |
-
"type": "object",
|
25 |
-
"properties": {
|
26 |
-
"role": {
|
27 |
-
"$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
|
28 |
-
},
|
29 |
-
"content": {
|
30 |
-
"type": "string",
|
31 |
-
"description": "The content of the chat completion message."
|
32 |
-
}
|
33 |
-
},
|
34 |
-
"title": "ChatCompletionOutputChoiceMessage",
|
35 |
-
"required": ["content", "role"]
|
36 |
-
}
|
37 |
-
},
|
38 |
-
"required": ["finish_reason", "index", "message"]
|
39 |
}
|
40 |
},
|
41 |
"created": {
|
42 |
"type": "integer",
|
43 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
}
|
45 |
},
|
46 |
-
"
|
47 |
-
|
48 |
-
|
49 |
-
"
|
50 |
-
"
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
}
|
57 |
}
|
58 |
}
|
|
|
1 |
{
|
2 |
"$id": "/inference/schemas/chat-completion/output.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
+
"description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
5 |
"title": "ChatCompletionOutput",
|
6 |
"type": "object",
|
7 |
+
"required": ["id", "object", "created", "model", "system_fingerprint", "choices", "usage"],
|
8 |
"properties": {
|
9 |
"choices": {
|
10 |
"type": "array",
|
|
|
|
|
11 |
"items": {
|
12 |
+
"$ref": "#/$defs/ChatCompletionOutputComplete"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
}
|
14 |
},
|
15 |
"created": {
|
16 |
"type": "integer",
|
17 |
+
"format": "int64",
|
18 |
+
"example": "1706270835",
|
19 |
+
"minimum": 0
|
20 |
+
},
|
21 |
+
"id": {
|
22 |
+
"type": "string"
|
23 |
+
},
|
24 |
+
"model": {
|
25 |
+
"type": "string",
|
26 |
+
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
27 |
+
},
|
28 |
+
"object": {
|
29 |
+
"type": "string"
|
30 |
+
},
|
31 |
+
"system_fingerprint": {
|
32 |
+
"type": "string"
|
33 |
+
},
|
34 |
+
"usage": {
|
35 |
+
"$ref": "#/$defs/ChatCompletionOutputUsage"
|
36 |
}
|
37 |
},
|
38 |
+
"$defs": {
|
39 |
+
"ChatCompletionOutputComplete": {
|
40 |
+
"type": "object",
|
41 |
+
"required": ["index", "message", "finish_reason"],
|
42 |
+
"properties": {
|
43 |
+
"finish_reason": {
|
44 |
+
"type": "string"
|
45 |
+
},
|
46 |
+
"index": {
|
47 |
+
"type": "integer",
|
48 |
+
"format": "int32",
|
49 |
+
"minimum": 0
|
50 |
+
},
|
51 |
+
"logprobs": {
|
52 |
+
"allOf": [
|
53 |
+
{
|
54 |
+
"$ref": "#/$defs/ChatCompletionOutputLogprobs"
|
55 |
+
}
|
56 |
+
],
|
57 |
+
"nullable": true
|
58 |
+
},
|
59 |
+
"message": {
|
60 |
+
"$ref": "#/$defs/ChatCompletionOutputMessage"
|
61 |
+
}
|
62 |
+
},
|
63 |
+
"title": "ChatCompletionOutputComplete"
|
64 |
+
},
|
65 |
+
"ChatCompletionOutputLogprobs": {
|
66 |
+
"type": "object",
|
67 |
+
"required": ["content"],
|
68 |
+
"properties": {
|
69 |
+
"content": {
|
70 |
+
"type": "array",
|
71 |
+
"items": {
|
72 |
+
"$ref": "#/$defs/ChatCompletionOutputLogprob"
|
73 |
+
}
|
74 |
+
}
|
75 |
+
},
|
76 |
+
"title": "ChatCompletionOutputLogprobs"
|
77 |
+
},
|
78 |
+
"ChatCompletionOutputLogprob": {
|
79 |
+
"type": "object",
|
80 |
+
"required": ["token", "logprob", "top_logprobs"],
|
81 |
+
"properties": {
|
82 |
+
"logprob": {
|
83 |
+
"type": "number",
|
84 |
+
"format": "float"
|
85 |
+
},
|
86 |
+
"token": {
|
87 |
+
"type": "string"
|
88 |
+
},
|
89 |
+
"top_logprobs": {
|
90 |
+
"type": "array",
|
91 |
+
"items": {
|
92 |
+
"$ref": "#/$defs/ChatCompletionOutputTopLogprob"
|
93 |
+
}
|
94 |
+
}
|
95 |
+
},
|
96 |
+
"title": "ChatCompletionOutputLogprob"
|
97 |
+
},
|
98 |
+
"ChatCompletionOutputTopLogprob": {
|
99 |
+
"type": "object",
|
100 |
+
"required": ["token", "logprob"],
|
101 |
+
"properties": {
|
102 |
+
"logprob": {
|
103 |
+
"type": "number",
|
104 |
+
"format": "float"
|
105 |
+
},
|
106 |
+
"token": {
|
107 |
+
"type": "string"
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"title": "ChatCompletionOutputTopLogprob"
|
111 |
+
},
|
112 |
+
"ChatCompletionOutputMessage": {
|
113 |
+
"type": "object",
|
114 |
+
"required": ["role"],
|
115 |
+
"properties": {
|
116 |
+
"content": {
|
117 |
+
"type": "string",
|
118 |
+
"example": "My name is David and I",
|
119 |
+
"nullable": true
|
120 |
+
},
|
121 |
+
"name": {
|
122 |
+
"type": "string",
|
123 |
+
"example": "\"David\"",
|
124 |
+
"nullable": true
|
125 |
+
},
|
126 |
+
"role": {
|
127 |
+
"type": "string",
|
128 |
+
"example": "user"
|
129 |
+
},
|
130 |
+
"tool_calls": {
|
131 |
+
"type": "array",
|
132 |
+
"items": {
|
133 |
+
"$ref": "#/$defs/ChatCompletionOutputToolCall"
|
134 |
+
},
|
135 |
+
"nullable": true
|
136 |
+
}
|
137 |
+
},
|
138 |
+
"title": "ChatCompletionOutputMessage"
|
139 |
+
},
|
140 |
+
"ChatCompletionOutputToolCall": {
|
141 |
+
"type": "object",
|
142 |
+
"required": ["id", "type", "function"],
|
143 |
+
"properties": {
|
144 |
+
"function": {
|
145 |
+
"$ref": "#/$defs/ChatCompletionOutputFunctionDefinition"
|
146 |
+
},
|
147 |
+
"id": {
|
148 |
+
"type": "integer",
|
149 |
+
"format": "int32",
|
150 |
+
"minimum": 0
|
151 |
+
},
|
152 |
+
"type": {
|
153 |
+
"type": "string"
|
154 |
+
}
|
155 |
+
},
|
156 |
+
"title": "ChatCompletionOutputToolCall"
|
157 |
+
},
|
158 |
+
"ChatCompletionOutputFunctionDefinition": {
|
159 |
+
"type": "object",
|
160 |
+
"required": ["name", "arguments"],
|
161 |
+
"properties": {
|
162 |
+
"arguments": {},
|
163 |
+
"description": {
|
164 |
+
"type": "string",
|
165 |
+
"nullable": true
|
166 |
+
},
|
167 |
+
"name": {
|
168 |
+
"type": "string"
|
169 |
+
}
|
170 |
+
},
|
171 |
+
"title": "ChatCompletionOutputFunctionDefinition"
|
172 |
+
},
|
173 |
+
"ChatCompletionOutputUsage": {
|
174 |
+
"type": "object",
|
175 |
+
"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
|
176 |
+
"properties": {
|
177 |
+
"completion_tokens": {
|
178 |
+
"type": "integer",
|
179 |
+
"format": "int32",
|
180 |
+
"minimum": 0
|
181 |
+
},
|
182 |
+
"prompt_tokens": {
|
183 |
+
"type": "integer",
|
184 |
+
"format": "int32",
|
185 |
+
"minimum": 0
|
186 |
+
},
|
187 |
+
"total_tokens": {
|
188 |
+
"type": "integer",
|
189 |
+
"format": "int32",
|
190 |
+
"minimum": 0
|
191 |
+
}
|
192 |
+
},
|
193 |
+
"title": "ChatCompletionOutputUsage"
|
194 |
}
|
195 |
}
|
196 |
}
|
packages/tasks/src/tasks/chat-completion/spec/output_stream.json
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"$id": "/inference/schemas/chat-completion/output_stream.json",
|
3 |
-
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
-
"description": "Chat Completion Stream Output",
|
5 |
-
"title": "ChatCompletionStreamOutput",
|
6 |
-
"type": "object",
|
7 |
-
"properties": {
|
8 |
-
"choices": {
|
9 |
-
"type": "array",
|
10 |
-
"title": "ChatCompletionStreamOutputChoice",
|
11 |
-
"description": "A list of chat completion choices.",
|
12 |
-
"items": {
|
13 |
-
"type": "object",
|
14 |
-
"properties": {
|
15 |
-
"delta": {
|
16 |
-
"type": "object",
|
17 |
-
"title": "ChatCompletionStreamOutputDelta",
|
18 |
-
"description": "A chat completion delta generated by streamed model responses.",
|
19 |
-
"properties": {
|
20 |
-
"content": {
|
21 |
-
"type": "string",
|
22 |
-
"description": "The contents of the chunk message."
|
23 |
-
},
|
24 |
-
"role": {
|
25 |
-
"type": "string",
|
26 |
-
"description": "The role of the author of this message."
|
27 |
-
}
|
28 |
-
}
|
29 |
-
},
|
30 |
-
"finish_reason": {
|
31 |
-
"$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
|
32 |
-
"description": "The reason why the generation was stopped."
|
33 |
-
},
|
34 |
-
"index": {
|
35 |
-
"type": "integer",
|
36 |
-
"description": "The index of the choice in the list of choices."
|
37 |
-
}
|
38 |
-
},
|
39 |
-
"required": ["delta", "index"]
|
40 |
-
}
|
41 |
-
},
|
42 |
-
"created": {
|
43 |
-
"type": "integer",
|
44 |
-
"description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
|
45 |
-
}
|
46 |
-
},
|
47 |
-
"required": ["choices", "created"]
|
48 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
packages/tasks/src/tasks/chat-completion/spec/stream_output.json
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"$id": "/inference/schemas/chat-completion/stream_output.json",
|
3 |
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
+
"description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
5 |
+
"title": "ChatCompletionStreamOutput",
|
6 |
+
"type": "object",
|
7 |
+
"required": ["id", "object", "created", "model", "system_fingerprint", "choices"],
|
8 |
+
"properties": {
|
9 |
+
"choices": {
|
10 |
+
"type": "array",
|
11 |
+
"items": {
|
12 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputChoice"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"created": {
|
16 |
+
"type": "integer",
|
17 |
+
"format": "int64",
|
18 |
+
"example": "1706270978",
|
19 |
+
"minimum": 0
|
20 |
+
},
|
21 |
+
"id": {
|
22 |
+
"type": "string"
|
23 |
+
},
|
24 |
+
"model": {
|
25 |
+
"type": "string",
|
26 |
+
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
27 |
+
},
|
28 |
+
"object": {
|
29 |
+
"type": "string"
|
30 |
+
},
|
31 |
+
"system_fingerprint": {
|
32 |
+
"type": "string"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"$defs": {
|
36 |
+
"ChatCompletionStreamOutputChoice": {
|
37 |
+
"type": "object",
|
38 |
+
"required": ["index", "delta"],
|
39 |
+
"properties": {
|
40 |
+
"delta": {
|
41 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputDelta"
|
42 |
+
},
|
43 |
+
"finish_reason": {
|
44 |
+
"type": "string",
|
45 |
+
"nullable": true
|
46 |
+
},
|
47 |
+
"index": {
|
48 |
+
"type": "integer",
|
49 |
+
"format": "int32",
|
50 |
+
"minimum": 0
|
51 |
+
},
|
52 |
+
"logprobs": {
|
53 |
+
"allOf": [
|
54 |
+
{
|
55 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputLogprobs"
|
56 |
+
}
|
57 |
+
],
|
58 |
+
"nullable": true
|
59 |
+
}
|
60 |
+
},
|
61 |
+
"title": "ChatCompletionStreamOutputChoice"
|
62 |
+
},
|
63 |
+
"ChatCompletionStreamOutputDelta": {
|
64 |
+
"type": "object",
|
65 |
+
"required": ["role"],
|
66 |
+
"properties": {
|
67 |
+
"content": {
|
68 |
+
"type": "string",
|
69 |
+
"example": "What is Deep Learning?",
|
70 |
+
"nullable": true
|
71 |
+
},
|
72 |
+
"role": {
|
73 |
+
"type": "string",
|
74 |
+
"example": "user"
|
75 |
+
},
|
76 |
+
"tool_calls": {
|
77 |
+
"allOf": [
|
78 |
+
{
|
79 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
|
80 |
+
}
|
81 |
+
],
|
82 |
+
"nullable": true
|
83 |
+
}
|
84 |
+
},
|
85 |
+
"title": "ChatCompletionStreamOutputDelta"
|
86 |
+
},
|
87 |
+
"ChatCompletionStreamOutputDeltaToolCall": {
|
88 |
+
"type": "object",
|
89 |
+
"required": ["index", "id", "type", "function"],
|
90 |
+
"properties": {
|
91 |
+
"function": {
|
92 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputFunction"
|
93 |
+
},
|
94 |
+
"id": {
|
95 |
+
"type": "string"
|
96 |
+
},
|
97 |
+
"index": {
|
98 |
+
"type": "integer",
|
99 |
+
"format": "int32",
|
100 |
+
"minimum": 0
|
101 |
+
},
|
102 |
+
"type": {
|
103 |
+
"type": "string"
|
104 |
+
}
|
105 |
+
},
|
106 |
+
"title": "ChatCompletionStreamOutputDeltaToolCall"
|
107 |
+
},
|
108 |
+
"ChatCompletionStreamOutputFunction": {
|
109 |
+
"type": "object",
|
110 |
+
"required": ["arguments"],
|
111 |
+
"properties": {
|
112 |
+
"arguments": {
|
113 |
+
"type": "string"
|
114 |
+
},
|
115 |
+
"name": {
|
116 |
+
"type": "string",
|
117 |
+
"nullable": true
|
118 |
+
}
|
119 |
+
},
|
120 |
+
"title": "ChatCompletionStreamOutputFunction"
|
121 |
+
},
|
122 |
+
"ChatCompletionStreamOutputLogprobs": {
|
123 |
+
"type": "object",
|
124 |
+
"required": ["content"],
|
125 |
+
"properties": {
|
126 |
+
"content": {
|
127 |
+
"type": "array",
|
128 |
+
"items": {
|
129 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputLogprob"
|
130 |
+
}
|
131 |
+
}
|
132 |
+
},
|
133 |
+
"title": "ChatCompletionStreamOutputLogprobs"
|
134 |
+
},
|
135 |
+
"ChatCompletionStreamOutputLogprob": {
|
136 |
+
"type": "object",
|
137 |
+
"required": ["token", "logprob", "top_logprobs"],
|
138 |
+
"properties": {
|
139 |
+
"logprob": {
|
140 |
+
"type": "number",
|
141 |
+
"format": "float"
|
142 |
+
},
|
143 |
+
"token": {
|
144 |
+
"type": "string"
|
145 |
+
},
|
146 |
+
"top_logprobs": {
|
147 |
+
"type": "array",
|
148 |
+
"items": {
|
149 |
+
"$ref": "#/$defs/ChatCompletionStreamOutputTopLogprob"
|
150 |
+
}
|
151 |
+
}
|
152 |
+
},
|
153 |
+
"title": "ChatCompletionStreamOutputLogprob"
|
154 |
+
},
|
155 |
+
"ChatCompletionStreamOutputTopLogprob": {
|
156 |
+
"type": "object",
|
157 |
+
"required": ["token", "logprob"],
|
158 |
+
"properties": {
|
159 |
+
"logprob": {
|
160 |
+
"type": "number",
|
161 |
+
"format": "float"
|
162 |
+
},
|
163 |
+
"token": {
|
164 |
+
"type": "string"
|
165 |
+
}
|
166 |
+
},
|
167 |
+
"title": "ChatCompletionStreamOutputTopLogprob"
|
168 |
+
}
|
169 |
+
}
|
170 |
+
}
|
packages/tasks/src/tasks/index.ts
CHANGED
@@ -43,9 +43,8 @@ export type {
|
|
43 |
ChatCompletionInput,
|
44 |
ChatCompletionInputMessage,
|
45 |
ChatCompletionOutput,
|
46 |
-
|
47 |
-
|
48 |
-
ChatCompletionOutputChoiceMessage,
|
49 |
ChatCompletionStreamOutput,
|
50 |
ChatCompletionStreamOutputChoice,
|
51 |
ChatCompletionStreamOutputDelta,
|
@@ -85,15 +84,15 @@ export type {
|
|
85 |
TextClassificationParameters,
|
86 |
} from "./text-classification/inference";
|
87 |
export type {
|
88 |
-
|
89 |
-
|
90 |
TextGenerationInput,
|
91 |
TextGenerationOutput,
|
92 |
TextGenerationOutputDetails,
|
93 |
-
|
94 |
-
|
95 |
TextGenerationOutputToken,
|
96 |
-
|
97 |
TextGenerationStreamOutput,
|
98 |
} from "./text-generation/inference";
|
99 |
export type * from "./video-classification/inference";
|
|
|
43 |
ChatCompletionInput,
|
44 |
ChatCompletionInputMessage,
|
45 |
ChatCompletionOutput,
|
46 |
+
ChatCompletionOutputComplete,
|
47 |
+
ChatCompletionOutputMessage,
|
|
|
48 |
ChatCompletionStreamOutput,
|
49 |
ChatCompletionStreamOutputChoice,
|
50 |
ChatCompletionStreamOutputDelta,
|
|
|
84 |
TextClassificationParameters,
|
85 |
} from "./text-classification/inference";
|
86 |
export type {
|
87 |
+
TextGenerationOutputFinishReason,
|
88 |
+
TextGenerationOutputPrefillToken,
|
89 |
TextGenerationInput,
|
90 |
TextGenerationOutput,
|
91 |
TextGenerationOutputDetails,
|
92 |
+
TextGenerationInputGenerateParameters,
|
93 |
+
TextGenerationOutputBestOfSequence,
|
94 |
TextGenerationOutputToken,
|
95 |
+
TextGenerationStreamOutputStreamDetails,
|
96 |
TextGenerationStreamOutput,
|
97 |
} from "./text-generation/inference";
|
98 |
export type * from "./video-classification/inference";
|
packages/tasks/src/tasks/text-generation/inference.ts
CHANGED
@@ -5,246 +5,134 @@
|
|
5 |
*/
|
6 |
|
7 |
/**
|
8 |
-
*
|
|
|
|
|
|
|
|
|
9 |
*/
|
10 |
export interface TextGenerationInput {
|
11 |
-
/**
|
12 |
-
* The text to initialize generation with
|
13 |
-
*/
|
14 |
inputs: string;
|
15 |
-
|
16 |
-
* Additional inference parameters
|
17 |
-
*/
|
18 |
-
parameters?: TextGenerationParameters;
|
19 |
-
/**
|
20 |
-
* Whether to stream output tokens
|
21 |
-
*/
|
22 |
stream?: boolean;
|
23 |
[property: string]: unknown;
|
24 |
}
|
25 |
|
26 |
-
|
27 |
-
* Additional inference parameters
|
28 |
-
*
|
29 |
-
* Additional inference parameters for Text Generation
|
30 |
-
*/
|
31 |
-
export interface TextGenerationParameters {
|
32 |
-
/**
|
33 |
-
* The number of sampling queries to run. Only the best one (in terms of total logprob) will
|
34 |
-
* be returned.
|
35 |
-
*/
|
36 |
best_of?: number;
|
37 |
-
/**
|
38 |
-
* Whether or not to output decoder input details
|
39 |
-
*/
|
40 |
decoder_input_details?: boolean;
|
41 |
-
/**
|
42 |
-
* Whether or not to output details
|
43 |
-
*/
|
44 |
details?: boolean;
|
45 |
-
/**
|
46 |
-
* Whether to use logits sampling instead of greedy decoding when generating new tokens.
|
47 |
-
*/
|
48 |
do_sample?: boolean;
|
49 |
-
|
50 |
-
|
51 |
-
*/
|
52 |
max_new_tokens?: number;
|
53 |
-
/**
|
54 |
-
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
|
55 |
-
* paper](https://hf.co/papers/1909.05858) for more details.
|
56 |
-
*/
|
57 |
repetition_penalty?: number;
|
58 |
-
/**
|
59 |
-
* Whether to prepend the prompt to the generated text.
|
60 |
-
*/
|
61 |
return_full_text?: boolean;
|
62 |
-
/**
|
63 |
-
* The random sampling seed.
|
64 |
-
*/
|
65 |
seed?: number;
|
66 |
-
|
67 |
-
* Stop generating tokens if a member of `stop_sequences` is generated.
|
68 |
-
*/
|
69 |
-
stop_sequences?: string[];
|
70 |
-
/**
|
71 |
-
* The value used to modulate the logits distribution.
|
72 |
-
*/
|
73 |
temperature?: number;
|
74 |
-
/**
|
75 |
-
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
76 |
-
*/
|
77 |
top_k?: number;
|
78 |
-
|
79 |
-
* If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
80 |
-
* up to `top_p` or higher are kept for generation.
|
81 |
-
*/
|
82 |
top_p?: number;
|
83 |
-
/**
|
84 |
-
* Truncate input tokens to the given size.
|
85 |
-
*/
|
86 |
truncate?: number;
|
87 |
-
/**
|
88 |
-
* Typical Decoding mass. See [Typical Decoding for Natural Language
|
89 |
-
* Generation](https://hf.co/papers/2202.00666) for more information
|
90 |
-
*/
|
91 |
typical_p?: number;
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
/**
|
93 |
-
*
|
|
|
|
|
|
|
94 |
*/
|
95 |
-
|
96 |
[property: string]: unknown;
|
97 |
}
|
98 |
|
|
|
|
|
99 |
/**
|
100 |
-
*
|
|
|
|
|
|
|
|
|
101 |
*/
|
102 |
export interface TextGenerationOutput {
|
103 |
-
/**
|
104 |
-
* When enabled, details about the generation
|
105 |
-
*/
|
106 |
details?: TextGenerationOutputDetails;
|
107 |
-
/**
|
108 |
-
* The generated text
|
109 |
-
*/
|
110 |
generated_text: string;
|
111 |
[property: string]: unknown;
|
112 |
}
|
113 |
|
114 |
-
/**
|
115 |
-
* When enabled, details about the generation
|
116 |
-
*/
|
117 |
export interface TextGenerationOutputDetails {
|
118 |
-
|
119 |
-
|
120 |
-
*/
|
121 |
-
best_of_sequences?: TextGenerationOutputSequenceDetails[];
|
122 |
-
/**
|
123 |
-
* The reason why the generation was stopped.
|
124 |
-
*/
|
125 |
-
finish_reason: TextGenerationFinishReason;
|
126 |
-
/**
|
127 |
-
* The number of generated tokens
|
128 |
-
*/
|
129 |
generated_tokens: number;
|
130 |
-
prefill:
|
131 |
-
/**
|
132 |
-
* The random seed used for generation
|
133 |
-
*/
|
134 |
seed?: number;
|
135 |
-
/**
|
136 |
-
* The generated tokens and associated details
|
137 |
-
*/
|
138 |
tokens: TextGenerationOutputToken[];
|
139 |
-
/**
|
140 |
-
* Most likely tokens
|
141 |
-
*/
|
142 |
top_tokens?: Array<TextGenerationOutputToken[]>;
|
143 |
[property: string]: unknown;
|
144 |
}
|
145 |
|
146 |
-
export interface
|
147 |
-
finish_reason:
|
148 |
-
/**
|
149 |
-
* The generated text
|
150 |
-
*/
|
151 |
generated_text: string;
|
152 |
-
/**
|
153 |
-
* The number of generated tokens
|
154 |
-
*/
|
155 |
generated_tokens: number;
|
156 |
-
prefill:
|
157 |
-
/**
|
158 |
-
* The random seed used for generation
|
159 |
-
*/
|
160 |
seed?: number;
|
161 |
-
/**
|
162 |
-
* The generated tokens and associated details
|
163 |
-
*/
|
164 |
tokens: TextGenerationOutputToken[];
|
165 |
-
/**
|
166 |
-
* Most likely tokens
|
167 |
-
*/
|
168 |
top_tokens?: Array<TextGenerationOutputToken[]>;
|
169 |
[property: string]: unknown;
|
170 |
}
|
171 |
|
172 |
-
|
173 |
-
* The reason why the generation was stopped.
|
174 |
-
*
|
175 |
-
* length: The generated sequence reached the maximum allowed length
|
176 |
-
*
|
177 |
-
* eos_token: The model generated an end-of-sentence (EOS) token
|
178 |
-
*
|
179 |
-
* stop_sequence: One of the sequence in stop_sequences was generated
|
180 |
-
*/
|
181 |
-
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
|
182 |
|
183 |
-
export interface
|
184 |
id: number;
|
185 |
logprob: number;
|
186 |
-
/**
|
187 |
-
* The text associated with that token
|
188 |
-
*/
|
189 |
text: string;
|
190 |
[property: string]: unknown;
|
191 |
}
|
192 |
|
193 |
-
/**
|
194 |
-
* Generated token.
|
195 |
-
*/
|
196 |
export interface TextGenerationOutputToken {
|
197 |
id: number;
|
198 |
-
logprob
|
199 |
-
/**
|
200 |
-
* Whether or not that token is a special one
|
201 |
-
*/
|
202 |
special: boolean;
|
203 |
-
/**
|
204 |
-
* The text associated with that token
|
205 |
-
*/
|
206 |
text: string;
|
207 |
[property: string]: unknown;
|
208 |
}
|
209 |
|
210 |
/**
|
211 |
-
* Text Generation Stream Output
|
|
|
|
|
|
|
|
|
212 |
*/
|
213 |
export interface TextGenerationStreamOutput {
|
214 |
-
|
215 |
-
* Generation details. Only available when the generation is finished.
|
216 |
-
*/
|
217 |
-
details?: TextGenerationStreamDetails;
|
218 |
-
/**
|
219 |
-
* The complete generated text. Only available when the generation is finished.
|
220 |
-
*/
|
221 |
generated_text?: string;
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
index?: number;
|
226 |
-
/**
|
227 |
-
* Generated token.
|
228 |
-
*/
|
229 |
-
token: TextGenerationOutputToken;
|
230 |
[property: string]: unknown;
|
231 |
}
|
232 |
|
233 |
-
|
234 |
-
|
235 |
-
*/
|
236 |
-
export interface TextGenerationStreamDetails {
|
237 |
-
/**
|
238 |
-
* The reason why the generation was stopped.
|
239 |
-
*/
|
240 |
-
finish_reason: TextGenerationFinishReason;
|
241 |
-
/**
|
242 |
-
* The number of generated tokens
|
243 |
-
*/
|
244 |
generated_tokens: number;
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
249 |
[property: string]: unknown;
|
250 |
}
|
|
|
5 |
*/
|
6 |
|
7 |
/**
|
8 |
+
* Text Generation Input.
|
9 |
+
*
|
10 |
+
* Auto-generated from TGI specs.
|
11 |
+
* For more details, check out
|
12 |
+
* https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
|
13 |
*/
|
14 |
export interface TextGenerationInput {
|
|
|
|
|
|
|
15 |
inputs: string;
|
16 |
+
parameters?: TextGenerationInputGenerateParameters;
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
stream?: boolean;
|
18 |
[property: string]: unknown;
|
19 |
}
|
20 |
|
21 |
+
export interface TextGenerationInputGenerateParameters {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
best_of?: number;
|
|
|
|
|
|
|
23 |
decoder_input_details?: boolean;
|
|
|
|
|
|
|
24 |
details?: boolean;
|
|
|
|
|
|
|
25 |
do_sample?: boolean;
|
26 |
+
frequency_penalty?: number;
|
27 |
+
grammar?: TextGenerationInputGrammarType;
|
|
|
28 |
max_new_tokens?: number;
|
|
|
|
|
|
|
|
|
29 |
repetition_penalty?: number;
|
|
|
|
|
|
|
30 |
return_full_text?: boolean;
|
|
|
|
|
|
|
31 |
seed?: number;
|
32 |
+
stop?: string[];
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
temperature?: number;
|
|
|
|
|
|
|
34 |
top_k?: number;
|
35 |
+
top_n_tokens?: number;
|
|
|
|
|
|
|
36 |
top_p?: number;
|
|
|
|
|
|
|
37 |
truncate?: number;
|
|
|
|
|
|
|
|
|
38 |
typical_p?: number;
|
39 |
+
watermark?: boolean;
|
40 |
+
[property: string]: unknown;
|
41 |
+
}
|
42 |
+
|
43 |
+
export interface TextGenerationInputGrammarType {
|
44 |
+
type: Type;
|
45 |
/**
|
46 |
+
* A string that represents a [JSON Schema](https://json-schema.org/).
|
47 |
+
*
|
48 |
+
* JSON Schema is a declarative language that allows to annotate JSON documents
|
49 |
+
* with types and descriptions.
|
50 |
*/
|
51 |
+
value: unknown;
|
52 |
[property: string]: unknown;
|
53 |
}
|
54 |
|
55 |
+
export type Type = "json" | "regex";
|
56 |
+
|
57 |
/**
|
58 |
+
* Text Generation Output.
|
59 |
+
*
|
60 |
+
* Auto-generated from TGI specs.
|
61 |
+
* For more details, check out
|
62 |
+
* https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
|
63 |
*/
|
64 |
export interface TextGenerationOutput {
|
|
|
|
|
|
|
65 |
details?: TextGenerationOutputDetails;
|
|
|
|
|
|
|
66 |
generated_text: string;
|
67 |
[property: string]: unknown;
|
68 |
}
|
69 |
|
|
|
|
|
|
|
70 |
export interface TextGenerationOutputDetails {
|
71 |
+
best_of_sequences?: TextGenerationOutputBestOfSequence[];
|
72 |
+
finish_reason: TextGenerationOutputFinishReason;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
generated_tokens: number;
|
74 |
+
prefill: TextGenerationOutputPrefillToken[];
|
|
|
|
|
|
|
75 |
seed?: number;
|
|
|
|
|
|
|
76 |
tokens: TextGenerationOutputToken[];
|
|
|
|
|
|
|
77 |
top_tokens?: Array<TextGenerationOutputToken[]>;
|
78 |
[property: string]: unknown;
|
79 |
}
|
80 |
|
81 |
+
export interface TextGenerationOutputBestOfSequence {
|
82 |
+
finish_reason: TextGenerationOutputFinishReason;
|
|
|
|
|
|
|
83 |
generated_text: string;
|
|
|
|
|
|
|
84 |
generated_tokens: number;
|
85 |
+
prefill: TextGenerationOutputPrefillToken[];
|
|
|
|
|
|
|
86 |
seed?: number;
|
|
|
|
|
|
|
87 |
tokens: TextGenerationOutputToken[];
|
|
|
|
|
|
|
88 |
top_tokens?: Array<TextGenerationOutputToken[]>;
|
89 |
[property: string]: unknown;
|
90 |
}
|
91 |
|
92 |
+
export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
export interface TextGenerationOutputPrefillToken {
|
95 |
id: number;
|
96 |
logprob: number;
|
|
|
|
|
|
|
97 |
text: string;
|
98 |
[property: string]: unknown;
|
99 |
}
|
100 |
|
|
|
|
|
|
|
101 |
export interface TextGenerationOutputToken {
|
102 |
id: number;
|
103 |
+
logprob: number;
|
|
|
|
|
|
|
104 |
special: boolean;
|
|
|
|
|
|
|
105 |
text: string;
|
106 |
[property: string]: unknown;
|
107 |
}
|
108 |
|
109 |
/**
|
110 |
+
* Text Generation Stream Output.
|
111 |
+
*
|
112 |
+
* Auto-generated from TGI specs.
|
113 |
+
* For more details, check out
|
114 |
+
* https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
|
115 |
*/
|
116 |
export interface TextGenerationStreamOutput {
|
117 |
+
details?: TextGenerationStreamOutputStreamDetails;
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
generated_text?: string;
|
119 |
+
index: number;
|
120 |
+
token: TextGenerationStreamOutputToken;
|
121 |
+
top_tokens?: TextGenerationStreamOutputToken[];
|
|
|
|
|
|
|
|
|
|
|
122 |
[property: string]: unknown;
|
123 |
}
|
124 |
|
125 |
+
export interface TextGenerationStreamOutputStreamDetails {
|
126 |
+
finish_reason: TextGenerationOutputFinishReason;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
generated_tokens: number;
|
128 |
+
seed?: number;
|
129 |
+
[property: string]: unknown;
|
130 |
+
}
|
131 |
+
|
132 |
+
export interface TextGenerationStreamOutputToken {
|
133 |
+
id: number;
|
134 |
+
logprob: number;
|
135 |
+
special: boolean;
|
136 |
+
text: string;
|
137 |
[property: string]: unknown;
|
138 |
}
|
packages/tasks/src/tasks/text-generation/spec/input.json
CHANGED
@@ -1,94 +1,195 @@
|
|
1 |
{
|
2 |
"$id": "/inference/schemas/text-generation/input.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
-
"description": "
|
5 |
"title": "TextGenerationInput",
|
6 |
"type": "object",
|
|
|
7 |
"properties": {
|
8 |
"inputs": {
|
9 |
-
"
|
10 |
-
"
|
11 |
},
|
12 |
"parameters": {
|
13 |
-
"
|
14 |
-
"$ref": "#/$defs/TextGenerationParameters"
|
15 |
},
|
16 |
"stream": {
|
17 |
-
"
|
18 |
-
"
|
19 |
}
|
20 |
},
|
21 |
"$defs": {
|
22 |
-
"
|
23 |
-
"title": "TextGenerationParameters",
|
24 |
-
"description": "Additional inference parameters for Text Generation",
|
25 |
"type": "object",
|
26 |
"properties": {
|
27 |
"best_of": {
|
28 |
"type": "integer",
|
29 |
-
"
|
|
|
|
|
|
|
|
|
30 |
},
|
31 |
"decoder_input_details": {
|
32 |
"type": "boolean",
|
33 |
-
"
|
34 |
},
|
35 |
"details": {
|
36 |
"type": "boolean",
|
37 |
-
"
|
38 |
},
|
39 |
"do_sample": {
|
40 |
"type": "boolean",
|
41 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
},
|
43 |
"max_new_tokens": {
|
44 |
"type": "integer",
|
45 |
-
"
|
|
|
|
|
|
|
|
|
46 |
},
|
47 |
"repetition_penalty": {
|
48 |
"type": "number",
|
49 |
-
"
|
|
|
|
|
|
|
|
|
50 |
},
|
51 |
"return_full_text": {
|
52 |
"type": "boolean",
|
53 |
-
"
|
|
|
|
|
54 |
},
|
55 |
"seed": {
|
56 |
"type": "integer",
|
57 |
-
"
|
|
|
|
|
|
|
|
|
|
|
58 |
},
|
59 |
-
"
|
60 |
"type": "array",
|
61 |
"items": {
|
62 |
"type": "string"
|
63 |
},
|
64 |
-
"
|
|
|
65 |
},
|
66 |
"temperature": {
|
67 |
"type": "number",
|
68 |
-
"
|
|
|
|
|
|
|
|
|
69 |
},
|
70 |
"top_k": {
|
71 |
"type": "integer",
|
72 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
},
|
74 |
"top_p": {
|
75 |
"type": "number",
|
76 |
-
"
|
|
|
|
|
|
|
|
|
|
|
77 |
},
|
78 |
"truncate": {
|
79 |
"type": "integer",
|
80 |
-
"
|
|
|
|
|
|
|
81 |
},
|
82 |
"typical_p": {
|
83 |
"type": "number",
|
84 |
-
"
|
|
|
|
|
|
|
|
|
|
|
85 |
},
|
86 |
"watermark": {
|
87 |
"type": "boolean",
|
88 |
-
"
|
|
|
89 |
}
|
90 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
}
|
92 |
-
}
|
93 |
-
"required": ["inputs"]
|
94 |
}
|
|
|
1 |
{
|
2 |
"$id": "/inference/schemas/text-generation/input.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
+
"description": "Text Generation Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
5 |
"title": "TextGenerationInput",
|
6 |
"type": "object",
|
7 |
+
"required": ["inputs"],
|
8 |
"properties": {
|
9 |
"inputs": {
|
10 |
+
"type": "string",
|
11 |
+
"example": "My name is Olivier and I"
|
12 |
},
|
13 |
"parameters": {
|
14 |
+
"$ref": "#/$defs/TextGenerationInputGenerateParameters"
|
|
|
15 |
},
|
16 |
"stream": {
|
17 |
+
"type": "boolean",
|
18 |
+
"default": "false"
|
19 |
}
|
20 |
},
|
21 |
"$defs": {
|
22 |
+
"TextGenerationInputGenerateParameters": {
|
|
|
|
|
23 |
"type": "object",
|
24 |
"properties": {
|
25 |
"best_of": {
|
26 |
"type": "integer",
|
27 |
+
"default": "null",
|
28 |
+
"example": 1,
|
29 |
+
"nullable": true,
|
30 |
+
"minimum": 0,
|
31 |
+
"exclusiveMinimum": 0
|
32 |
},
|
33 |
"decoder_input_details": {
|
34 |
"type": "boolean",
|
35 |
+
"default": "false"
|
36 |
},
|
37 |
"details": {
|
38 |
"type": "boolean",
|
39 |
+
"default": "true"
|
40 |
},
|
41 |
"do_sample": {
|
42 |
"type": "boolean",
|
43 |
+
"default": "false",
|
44 |
+
"example": true
|
45 |
+
},
|
46 |
+
"frequency_penalty": {
|
47 |
+
"type": "number",
|
48 |
+
"format": "float",
|
49 |
+
"default": "null",
|
50 |
+
"example": 0.1,
|
51 |
+
"nullable": true,
|
52 |
+
"exclusiveMinimum": -2
|
53 |
+
},
|
54 |
+
"grammar": {
|
55 |
+
"allOf": [
|
56 |
+
{
|
57 |
+
"$ref": "#/$defs/TextGenerationInputGrammarType"
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"default": "null",
|
61 |
+
"nullable": true
|
62 |
},
|
63 |
"max_new_tokens": {
|
64 |
"type": "integer",
|
65 |
+
"format": "int32",
|
66 |
+
"default": "100",
|
67 |
+
"example": "20",
|
68 |
+
"nullable": true,
|
69 |
+
"minimum": 0
|
70 |
},
|
71 |
"repetition_penalty": {
|
72 |
"type": "number",
|
73 |
+
"format": "float",
|
74 |
+
"default": "null",
|
75 |
+
"example": 1.03,
|
76 |
+
"nullable": true,
|
77 |
+
"exclusiveMinimum": 0
|
78 |
},
|
79 |
"return_full_text": {
|
80 |
"type": "boolean",
|
81 |
+
"default": "null",
|
82 |
+
"example": false,
|
83 |
+
"nullable": true
|
84 |
},
|
85 |
"seed": {
|
86 |
"type": "integer",
|
87 |
+
"format": "int64",
|
88 |
+
"default": "null",
|
89 |
+
"example": "null",
|
90 |
+
"nullable": true,
|
91 |
+
"minimum": 0,
|
92 |
+
"exclusiveMinimum": 0
|
93 |
},
|
94 |
+
"stop": {
|
95 |
"type": "array",
|
96 |
"items": {
|
97 |
"type": "string"
|
98 |
},
|
99 |
+
"example": ["photographer"],
|
100 |
+
"maxItems": 4
|
101 |
},
|
102 |
"temperature": {
|
103 |
"type": "number",
|
104 |
+
"format": "float",
|
105 |
+
"default": "null",
|
106 |
+
"example": 0.5,
|
107 |
+
"nullable": true,
|
108 |
+
"exclusiveMinimum": 0
|
109 |
},
|
110 |
"top_k": {
|
111 |
"type": "integer",
|
112 |
+
"format": "int32",
|
113 |
+
"default": "null",
|
114 |
+
"example": 10,
|
115 |
+
"nullable": true,
|
116 |
+
"exclusiveMinimum": 0
|
117 |
+
},
|
118 |
+
"top_n_tokens": {
|
119 |
+
"type": "integer",
|
120 |
+
"format": "int32",
|
121 |
+
"default": "null",
|
122 |
+
"example": 5,
|
123 |
+
"nullable": true,
|
124 |
+
"minimum": 0,
|
125 |
+
"exclusiveMinimum": 0
|
126 |
},
|
127 |
"top_p": {
|
128 |
"type": "number",
|
129 |
+
"format": "float",
|
130 |
+
"default": "null",
|
131 |
+
"example": 0.95,
|
132 |
+
"nullable": true,
|
133 |
+
"maximum": 1,
|
134 |
+
"exclusiveMinimum": 0
|
135 |
},
|
136 |
"truncate": {
|
137 |
"type": "integer",
|
138 |
+
"default": "null",
|
139 |
+
"example": "null",
|
140 |
+
"nullable": true,
|
141 |
+
"minimum": 0
|
142 |
},
|
143 |
"typical_p": {
|
144 |
"type": "number",
|
145 |
+
"format": "float",
|
146 |
+
"default": "null",
|
147 |
+
"example": 0.95,
|
148 |
+
"nullable": true,
|
149 |
+
"maximum": 1,
|
150 |
+
"exclusiveMinimum": 0
|
151 |
},
|
152 |
"watermark": {
|
153 |
"type": "boolean",
|
154 |
+
"default": "false",
|
155 |
+
"example": true
|
156 |
}
|
157 |
+
},
|
158 |
+
"title": "TextGenerationInputGenerateParameters"
|
159 |
+
},
|
160 |
+
"TextGenerationInputGrammarType": {
|
161 |
+
"oneOf": [
|
162 |
+
{
|
163 |
+
"type": "object",
|
164 |
+
"required": ["type", "value"],
|
165 |
+
"properties": {
|
166 |
+
"type": {
|
167 |
+
"type": "string",
|
168 |
+
"enum": ["json"]
|
169 |
+
},
|
170 |
+
"value": {
|
171 |
+
"description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
|
172 |
+
}
|
173 |
+
}
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"type": "object",
|
177 |
+
"required": ["type", "value"],
|
178 |
+
"properties": {
|
179 |
+
"type": {
|
180 |
+
"type": "string",
|
181 |
+
"enum": ["regex"]
|
182 |
+
},
|
183 |
+
"value": {
|
184 |
+
"type": "string"
|
185 |
+
}
|
186 |
+
}
|
187 |
+
}
|
188 |
+
],
|
189 |
+
"discriminator": {
|
190 |
+
"propertyName": "type"
|
191 |
+
},
|
192 |
+
"title": "TextGenerationInputGrammarType"
|
193 |
}
|
194 |
+
}
|
|
|
195 |
}
|
packages/tasks/src/tasks/text-generation/spec/output.json
CHANGED
@@ -1,165 +1,179 @@
|
|
1 |
{
|
2 |
"$id": "/inference/schemas/text-generation/output.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
-
"description": "
|
5 |
"title": "TextGenerationOutput",
|
6 |
"type": "object",
|
|
|
7 |
"properties": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"generated_text": {
|
9 |
"type": "string",
|
10 |
-
"
|
11 |
-
},
|
12 |
-
"details": {
|
13 |
-
"$ref": "#/$defs/Details",
|
14 |
-
"description": "When enabled, details about the generation"
|
15 |
}
|
16 |
},
|
17 |
-
"required": ["generated_text"],
|
18 |
"$defs": {
|
19 |
-
"
|
20 |
-
"type": "string",
|
21 |
-
"title": "TextGenerationFinishReason",
|
22 |
-
"description": "The reason why the generation was stopped.",
|
23 |
-
"oneOf": [
|
24 |
-
{ "const": "length", "description": "length: The generated sequence reached the maximum allowed length" },
|
25 |
-
{ "const": "eos_token", "description": "eos_token: The model generated an end-of-sentence (EOS) token" },
|
26 |
-
{
|
27 |
-
"const": "stop_sequence",
|
28 |
-
"description": "stop_sequence: One of the sequence in stop_sequences was generated"
|
29 |
-
}
|
30 |
-
]
|
31 |
-
},
|
32 |
-
"PrefillToken": {
|
33 |
-
"title": "TextGenerationPrefillToken",
|
34 |
"type": "object",
|
|
|
35 |
"properties": {
|
36 |
-
"
|
37 |
-
"type": "
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
"text": {
|
43 |
-
"type": "string",
|
44 |
-
"description": "The text associated with that token"
|
45 |
-
}
|
46 |
-
},
|
47 |
-
"required": ["id", "logprob", "text"]
|
48 |
-
},
|
49 |
-
"Token": {
|
50 |
-
"type": "object",
|
51 |
-
"title": "TextGenerationOutputToken",
|
52 |
-
"properties": {
|
53 |
-
"id": {
|
54 |
-
"type": "integer"
|
55 |
-
},
|
56 |
-
"logprob": {
|
57 |
-
"type": "number"
|
58 |
-
},
|
59 |
-
"special": {
|
60 |
-
"type": "boolean",
|
61 |
-
"description": "Whether or not that token is a special one"
|
62 |
},
|
63 |
-
"text": {
|
64 |
-
"type": "string",
|
65 |
-
"description": "The text associated with that token"
|
66 |
-
}
|
67 |
-
},
|
68 |
-
"required": ["id", "special", "text"]
|
69 |
-
},
|
70 |
-
"Details": {
|
71 |
-
"type": "object",
|
72 |
-
"title": "TextGenerationOutputDetails",
|
73 |
-
"properties": {
|
74 |
"finish_reason": {
|
75 |
-
"$ref": "#/$defs/
|
76 |
-
"description": "The reason why the generation was stopped."
|
77 |
},
|
78 |
"generated_tokens": {
|
79 |
"type": "integer",
|
80 |
-
"
|
|
|
|
|
81 |
},
|
82 |
"prefill": {
|
83 |
"type": "array",
|
84 |
"items": {
|
85 |
-
"$ref": "#/$defs/
|
86 |
}
|
87 |
},
|
88 |
"seed": {
|
89 |
"type": "integer",
|
90 |
-
"
|
|
|
|
|
|
|
91 |
},
|
92 |
"tokens": {
|
93 |
"type": "array",
|
94 |
-
"description": "The generated tokens and associated details",
|
95 |
"items": {
|
96 |
-
"$ref": "#/$defs/
|
97 |
}
|
98 |
},
|
99 |
"top_tokens": {
|
100 |
"type": "array",
|
101 |
-
"description": "Most likely tokens",
|
102 |
"items": {
|
103 |
"type": "array",
|
104 |
"items": {
|
105 |
-
"$ref": "#/$defs/
|
106 |
}
|
107 |
}
|
108 |
-
},
|
109 |
-
"best_of_sequences": {
|
110 |
-
"type": "array",
|
111 |
-
"description": "Details about additional sequences when best_of is provided",
|
112 |
-
"items": {
|
113 |
-
"$ref": "#/$defs/SequenceDetails"
|
114 |
-
}
|
115 |
}
|
116 |
},
|
117 |
-
"
|
118 |
},
|
119 |
-
"
|
120 |
"type": "object",
|
121 |
-
"
|
122 |
"properties": {
|
|
|
|
|
|
|
123 |
"generated_text": {
|
124 |
"type": "string",
|
125 |
-
"
|
126 |
-
},
|
127 |
-
"finish_reason": {
|
128 |
-
"$ref": "#/$defs/FinishReason"
|
129 |
},
|
130 |
"generated_tokens": {
|
131 |
"type": "integer",
|
132 |
-
"
|
|
|
|
|
133 |
},
|
134 |
"prefill": {
|
135 |
"type": "array",
|
136 |
"items": {
|
137 |
-
"$ref": "#/$defs/
|
138 |
}
|
139 |
},
|
140 |
"seed": {
|
141 |
"type": "integer",
|
142 |
-
"
|
|
|
|
|
|
|
143 |
},
|
144 |
"tokens": {
|
145 |
"type": "array",
|
146 |
-
"description": "The generated tokens and associated details",
|
147 |
"items": {
|
148 |
-
"$ref": "#/$defs/
|
149 |
}
|
150 |
},
|
151 |
"top_tokens": {
|
152 |
"type": "array",
|
153 |
-
"description": "Most likely tokens",
|
154 |
"items": {
|
155 |
"type": "array",
|
156 |
"items": {
|
157 |
-
"$ref": "#/$defs/
|
158 |
}
|
159 |
}
|
160 |
}
|
161 |
},
|
162 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
}
|
164 |
}
|
165 |
}
|
|
|
1 |
{
|
2 |
"$id": "/inference/schemas/text-generation/output.json",
|
3 |
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
+
"description": "Text Generation Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
5 |
"title": "TextGenerationOutput",
|
6 |
"type": "object",
|
7 |
+
"required": ["generated_text"],
|
8 |
"properties": {
|
9 |
+
"details": {
|
10 |
+
"allOf": [
|
11 |
+
{
|
12 |
+
"$ref": "#/$defs/TextGenerationOutputDetails"
|
13 |
+
}
|
14 |
+
],
|
15 |
+
"nullable": true
|
16 |
+
},
|
17 |
"generated_text": {
|
18 |
"type": "string",
|
19 |
+
"example": "test"
|
|
|
|
|
|
|
|
|
20 |
}
|
21 |
},
|
|
|
22 |
"$defs": {
|
23 |
+
"TextGenerationOutputDetails": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
"type": "object",
|
25 |
+
"required": ["finish_reason", "generated_tokens", "prefill", "tokens"],
|
26 |
"properties": {
|
27 |
+
"best_of_sequences": {
|
28 |
+
"type": "array",
|
29 |
+
"items": {
|
30 |
+
"$ref": "#/$defs/TextGenerationOutputBestOfSequence"
|
31 |
+
},
|
32 |
+
"nullable": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"finish_reason": {
|
35 |
+
"$ref": "#/$defs/TextGenerationOutputFinishReason"
|
|
|
36 |
},
|
37 |
"generated_tokens": {
|
38 |
"type": "integer",
|
39 |
+
"format": "int32",
|
40 |
+
"example": 1,
|
41 |
+
"minimum": 0
|
42 |
},
|
43 |
"prefill": {
|
44 |
"type": "array",
|
45 |
"items": {
|
46 |
+
"$ref": "#/$defs/TextGenerationOutputPrefillToken"
|
47 |
}
|
48 |
},
|
49 |
"seed": {
|
50 |
"type": "integer",
|
51 |
+
"format": "int64",
|
52 |
+
"example": 42,
|
53 |
+
"nullable": true,
|
54 |
+
"minimum": 0
|
55 |
},
|
56 |
"tokens": {
|
57 |
"type": "array",
|
|
|
58 |
"items": {
|
59 |
+
"$ref": "#/$defs/TextGenerationOutputToken"
|
60 |
}
|
61 |
},
|
62 |
"top_tokens": {
|
63 |
"type": "array",
|
|
|
64 |
"items": {
|
65 |
"type": "array",
|
66 |
"items": {
|
67 |
+
"$ref": "#/$defs/TextGenerationOutputToken"
|
68 |
}
|
69 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
}
|
71 |
},
|
72 |
+
"title": "TextGenerationOutputDetails"
|
73 |
},
|
74 |
+
"TextGenerationOutputBestOfSequence": {
|
75 |
"type": "object",
|
76 |
+
"required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"],
|
77 |
"properties": {
|
78 |
+
"finish_reason": {
|
79 |
+
"$ref": "#/$defs/TextGenerationOutputFinishReason"
|
80 |
+
},
|
81 |
"generated_text": {
|
82 |
"type": "string",
|
83 |
+
"example": "test"
|
|
|
|
|
|
|
84 |
},
|
85 |
"generated_tokens": {
|
86 |
"type": "integer",
|
87 |
+
"format": "int32",
|
88 |
+
"example": 1,
|
89 |
+
"minimum": 0
|
90 |
},
|
91 |
"prefill": {
|
92 |
"type": "array",
|
93 |
"items": {
|
94 |
+
"$ref": "#/$defs/TextGenerationOutputPrefillToken"
|
95 |
}
|
96 |
},
|
97 |
"seed": {
|
98 |
"type": "integer",
|
99 |
+
"format": "int64",
|
100 |
+
"example": 42,
|
101 |
+
"nullable": true,
|
102 |
+
"minimum": 0
|
103 |
},
|
104 |
"tokens": {
|
105 |
"type": "array",
|
|
|
106 |
"items": {
|
107 |
+
"$ref": "#/$defs/TextGenerationOutputToken"
|
108 |
}
|
109 |
},
|
110 |
"top_tokens": {
|
111 |
"type": "array",
|
|
|
112 |
"items": {
|
113 |
"type": "array",
|
114 |
"items": {
|
115 |
+
"$ref": "#/$defs/TextGenerationOutputToken"
|
116 |
}
|
117 |
}
|
118 |
}
|
119 |
},
|
120 |
+
"title": "TextGenerationOutputBestOfSequence"
|
121 |
+
},
|
122 |
+
"TextGenerationOutputFinishReason": {
|
123 |
+
"type": "string",
|
124 |
+
"enum": ["length", "eos_token", "stop_sequence"],
|
125 |
+
"example": "Length",
|
126 |
+
"title": "TextGenerationOutputFinishReason"
|
127 |
+
},
|
128 |
+
"TextGenerationOutputPrefillToken": {
|
129 |
+
"type": "object",
|
130 |
+
"required": ["id", "text", "logprob"],
|
131 |
+
"properties": {
|
132 |
+
"id": {
|
133 |
+
"type": "integer",
|
134 |
+
"format": "int32",
|
135 |
+
"example": 0,
|
136 |
+
"minimum": 0
|
137 |
+
},
|
138 |
+
"logprob": {
|
139 |
+
"type": "number",
|
140 |
+
"format": "float",
|
141 |
+
"example": -0.34,
|
142 |
+
"nullable": true
|
143 |
+
},
|
144 |
+
"text": {
|
145 |
+
"type": "string",
|
146 |
+
"example": "test"
|
147 |
+
}
|
148 |
+
},
|
149 |
+
"title": "TextGenerationOutputPrefillToken"
|
150 |
+
},
|
151 |
+
"TextGenerationOutputToken": {
|
152 |
+
"type": "object",
|
153 |
+
"required": ["id", "text", "logprob", "special"],
|
154 |
+
"properties": {
|
155 |
+
"id": {
|
156 |
+
"type": "integer",
|
157 |
+
"format": "int32",
|
158 |
+
"example": 0,
|
159 |
+
"minimum": 0
|
160 |
+
},
|
161 |
+
"logprob": {
|
162 |
+
"type": "number",
|
163 |
+
"format": "float",
|
164 |
+
"example": -0.34,
|
165 |
+
"nullable": true
|
166 |
+
},
|
167 |
+
"special": {
|
168 |
+
"type": "boolean",
|
169 |
+
"example": "false"
|
170 |
+
},
|
171 |
+
"text": {
|
172 |
+
"type": "string",
|
173 |
+
"example": "test"
|
174 |
+
}
|
175 |
+
},
|
176 |
+
"title": "TextGenerationOutputToken"
|
177 |
}
|
178 |
}
|
179 |
}
|
packages/tasks/src/tasks/text-generation/spec/output_stream.json
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"$id": "/inference/schemas/text-generation/output.json",
|
3 |
-
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
-
"description": "Text Generation Stream Output",
|
5 |
-
"title": "TextGenerationStreamOutput",
|
6 |
-
"type": "object",
|
7 |
-
"properties": {
|
8 |
-
"token": {
|
9 |
-
"$ref": "#/$defs/Token",
|
10 |
-
"description": "Generated token."
|
11 |
-
},
|
12 |
-
"index": {
|
13 |
-
"type": "integer",
|
14 |
-
"description": "The token index within the stream. Optional to support older clients that omit it."
|
15 |
-
},
|
16 |
-
"generated_text": {
|
17 |
-
"type": "string",
|
18 |
-
"description": "The complete generated text. Only available when the generation is finished."
|
19 |
-
},
|
20 |
-
"details": {
|
21 |
-
"$ref": "#/$defs/StreamDetails",
|
22 |
-
"description": "Generation details. Only available when the generation is finished."
|
23 |
-
}
|
24 |
-
},
|
25 |
-
"required": ["token"],
|
26 |
-
"$defs": {
|
27 |
-
"StreamDetails": {
|
28 |
-
"type": "object",
|
29 |
-
"title": "TextGenerationStreamDetails",
|
30 |
-
"properties": {
|
31 |
-
"finish_reason": {
|
32 |
-
"$ref": "#/$defs/FinishReason",
|
33 |
-
"description": "The reason why the generation was stopped."
|
34 |
-
},
|
35 |
-
"generated_tokens": {
|
36 |
-
"type": "integer",
|
37 |
-
"description": "The number of generated tokens"
|
38 |
-
},
|
39 |
-
"seed": {
|
40 |
-
"type": "integer",
|
41 |
-
"description": "The random seed used for generation"
|
42 |
-
}
|
43 |
-
},
|
44 |
-
"required": ["finish_reason", "generated_tokens", "seed"]
|
45 |
-
}
|
46 |
-
}
|
47 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
packages/tasks/src/tasks/text-generation/spec/stream_output.json
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"$id": "/inference/schemas/text-generation/stream_output.json",
|
3 |
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
4 |
+
"description": "Text Generation Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
5 |
+
"title": "TextGenerationStreamOutput",
|
6 |
+
"type": "object",
|
7 |
+
"required": ["index", "token"],
|
8 |
+
"properties": {
|
9 |
+
"details": {
|
10 |
+
"allOf": [
|
11 |
+
{
|
12 |
+
"$ref": "#/$defs/TextGenerationStreamOutputStreamDetails"
|
13 |
+
}
|
14 |
+
],
|
15 |
+
"default": "null",
|
16 |
+
"nullable": true
|
17 |
+
},
|
18 |
+
"generated_text": {
|
19 |
+
"type": "string",
|
20 |
+
"default": "null",
|
21 |
+
"example": "test",
|
22 |
+
"nullable": true
|
23 |
+
},
|
24 |
+
"index": {
|
25 |
+
"type": "integer",
|
26 |
+
"format": "int32",
|
27 |
+
"minimum": 0
|
28 |
+
},
|
29 |
+
"token": {
|
30 |
+
"$ref": "#/$defs/TextGenerationStreamOutputToken"
|
31 |
+
},
|
32 |
+
"top_tokens": {
|
33 |
+
"type": "array",
|
34 |
+
"items": {
|
35 |
+
"$ref": "#/$defs/TextGenerationStreamOutputToken"
|
36 |
+
}
|
37 |
+
}
|
38 |
+
},
|
39 |
+
"$defs": {
|
40 |
+
"TextGenerationStreamOutputStreamDetails": {
|
41 |
+
"type": "object",
|
42 |
+
"required": ["finish_reason", "generated_tokens"],
|
43 |
+
"properties": {
|
44 |
+
"finish_reason": {
|
45 |
+
"$ref": "#/$defs/TextGenerationStreamOutputFinishReason"
|
46 |
+
},
|
47 |
+
"generated_tokens": {
|
48 |
+
"type": "integer",
|
49 |
+
"format": "int32",
|
50 |
+
"example": 1,
|
51 |
+
"minimum": 0
|
52 |
+
},
|
53 |
+
"seed": {
|
54 |
+
"type": "integer",
|
55 |
+
"format": "int64",
|
56 |
+
"example": 42,
|
57 |
+
"nullable": true,
|
58 |
+
"minimum": 0
|
59 |
+
}
|
60 |
+
},
|
61 |
+
"title": "TextGenerationStreamOutputStreamDetails"
|
62 |
+
},
|
63 |
+
"TextGenerationStreamOutputFinishReason": {
|
64 |
+
"type": "string",
|
65 |
+
"enum": ["length", "eos_token", "stop_sequence"],
|
66 |
+
"example": "Length",
|
67 |
+
"title": "TextGenerationStreamOutputFinishReason"
|
68 |
+
},
|
69 |
+
"TextGenerationStreamOutputToken": {
|
70 |
+
"type": "object",
|
71 |
+
"required": ["id", "text", "logprob", "special"],
|
72 |
+
"properties": {
|
73 |
+
"id": {
|
74 |
+
"type": "integer",
|
75 |
+
"format": "int32",
|
76 |
+
"example": 0,
|
77 |
+
"minimum": 0
|
78 |
+
},
|
79 |
+
"logprob": {
|
80 |
+
"type": "number",
|
81 |
+
"format": "float",
|
82 |
+
"example": -0.34,
|
83 |
+
"nullable": true
|
84 |
+
},
|
85 |
+
"special": {
|
86 |
+
"type": "boolean",
|
87 |
+
"example": "false"
|
88 |
+
},
|
89 |
+
"text": {
|
90 |
+
"type": "string",
|
91 |
+
"example": "test"
|
92 |
+
}
|
93 |
+
},
|
94 |
+
"title": "TextGenerationStreamOutputToken"
|
95 |
+
}
|
96 |
+
}
|
97 |
+
}
|