Paul Dufour
commited on
Commit
·
ee4ce56
1
Parent(s):
246f1f5
Update demo
Browse files
Makefile
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
run:
|
2 |
+
static-web-server -p 3000 --cors-allow-origins "*" --cors-allow-headers "*" --cors-expose-headers "*" -d . -e false -z
|
index.html
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
<html lang="en">
|
3 |
-
|
4 |
<head>
|
5 |
<meta charset="UTF-8" />
|
6 |
<link rel="stylesheet" href="style.css" />
|
7 |
-
|
8 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
9 |
<title>Vision Demo</title>
|
10 |
-
|
11 |
{
|
12 |
"imports": {
|
13 |
"@huggingface/transformers": "https://esm.run/@huggingface/[email protected]"
|
@@ -15,32 +13,39 @@
|
|
15 |
}
|
16 |
</script>
|
17 |
</head>
|
18 |
-
|
19 |
<body>
|
20 |
<h1>Vision Demo</h1>
|
21 |
<div style="text-align: center">Everything runs directly in your browser (no server required)</div>
|
22 |
<form id="form">
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
<
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
</form>
|
39 |
-
|
40 |
-
<script src="index.js" type="module">
|
41 |
-
document.addEventListener('DOMContentLoaded', async () => {
|
42 |
-
await import('./index.js');
|
43 |
-
});
|
44 |
-
</script>
|
45 |
</body>
|
46 |
</html>
|
|
|
1 |
<!DOCTYPE html>
|
2 |
<html lang="en">
|
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8" />
|
5 |
<link rel="stylesheet" href="style.css" />
|
|
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7 |
<title>Vision Demo</title>
|
8 |
+
<script type="importmap">
|
9 |
{
|
10 |
"imports": {
|
11 |
"@huggingface/transformers": "https://esm.run/@huggingface/[email protected]"
|
|
|
13 |
}
|
14 |
</script>
|
15 |
</head>
|
|
|
16 |
<body>
|
17 |
<h1>Vision Demo</h1>
|
18 |
<div style="text-align: center">Everything runs directly in your browser (no server required)</div>
|
19 |
<form id="form">
|
20 |
+
<div>
|
21 |
+
<select id="dtype-select">
|
22 |
+
<option value=""></option>
|
23 |
+
<option value="fp32">fp32</option>
|
24 |
+
<option value="fp16">fp16</option>
|
25 |
+
<option value="q8">q8</option>
|
26 |
+
<option value="int8">int8</option>
|
27 |
+
<option value="uint8">uint8</option>
|
28 |
+
<option value="q4">q4</option>
|
29 |
+
<option value="bnb4">bnb4</option>
|
30 |
+
<option value="q4f16">q4f16</option>
|
31 |
+
</select>
|
32 |
+
<button id="load-model" type="button">Load Model</button>
|
33 |
+
</div>
|
34 |
+
<label id="container" for="upload">
|
35 |
+
<div id="thumb">
|
36 |
+
<svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
|
37 |
+
<path fill="#000" d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"></path>
|
38 |
+
</svg>
|
39 |
+
Click to upload image
|
40 |
+
<label id="example">(or try example)</label>
|
41 |
+
</div>
|
42 |
+
<div>
|
43 |
+
<input type="text" placeholder="Add your prompt and hit enter" disabled>
|
44 |
+
</div>
|
45 |
+
</label>
|
46 |
+
<input id="upload" type="file" accept="image/*" disabled />
|
47 |
+
<div id="llm-output"></div>
|
48 |
</form>
|
49 |
+
<script src="index.js" type="module"></script>
|
|
|
|
|
|
|
|
|
|
|
50 |
</body>
|
51 |
</html>
|
index.js
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
|
2 |
|
3 |
-
|
4 |
const EXAMPLE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg";
|
5 |
|
6 |
const exampleButton = document.getElementById('example');
|
@@ -10,6 +9,9 @@ const thumb = document.getElementById('thumb');
|
|
10 |
const uploadInput = document.getElementById('upload');
|
11 |
const form = document.getElementById('form');
|
12 |
const output = document.getElementById('llm-output');
|
|
|
|
|
|
|
13 |
|
14 |
let currentImage = '';
|
15 |
let currentQuery = '';
|
@@ -18,15 +20,24 @@ let processor;
|
|
18 |
let model;
|
19 |
|
20 |
async function initializeSessions() {
|
21 |
-
|
|
|
22 |
container.classList.add('disabled');
|
23 |
|
24 |
processor = await AutoProcessor.from_pretrained(model_id);
|
25 |
-
model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id, { dtype: 'q4f16', device: 'webgpu' });
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
30 |
uploadInput.disabled = false;
|
31 |
promptInput.disabled = false;
|
32 |
container.classList.remove('disabled');
|
@@ -34,25 +45,25 @@ async function initializeSessions() {
|
|
34 |
|
35 |
async function handleQuery(imageUrl, query) {
|
36 |
try {
|
37 |
-
|
38 |
|
39 |
const result = await imageTextToText(imageUrl, query, (out) => {
|
40 |
console.log({ out });
|
41 |
output.textContent = out;
|
42 |
});
|
|
|
|
|
43 |
} catch (err) {
|
44 |
-
|
45 |
console.error(err);
|
46 |
}
|
47 |
}
|
48 |
|
49 |
-
|
50 |
-
export async function imageTextToText(
|
51 |
imagePath,
|
52 |
query,
|
53 |
cb,
|
54 |
) {
|
55 |
-
|
56 |
const image = await (await RawImage.read(imagePath)).resize(448, 448);
|
57 |
const conversation = [
|
58 |
{
|
@@ -92,7 +103,11 @@ async function updatePreview(url) {
|
|
92 |
thumb.innerHTML = '';
|
93 |
}
|
94 |
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
97 |
// UI Event Handlers
|
98 |
exampleButton.addEventListener('click', (e) => {
|
@@ -121,7 +136,10 @@ form.addEventListener('submit', (e) => {
|
|
121 |
e.preventDefault();
|
122 |
|
123 |
if (!currentImage || !currentQuery) {
|
124 |
-
|
|
|
|
|
|
|
125 |
} else {
|
126 |
promptInput.disabled = true;
|
127 |
uploadInput.disabled = true;
|
|
|
1 |
import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
|
2 |
|
|
|
3 |
const EXAMPLE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg";
|
4 |
|
5 |
const exampleButton = document.getElementById('example');
|
|
|
9 |
const uploadInput = document.getElementById('upload');
|
10 |
const form = document.getElementById('form');
|
11 |
const output = document.getElementById('llm-output');
|
12 |
+
const dtypeSelect = document.getElementById('dtype-select');
|
13 |
+
const loadModelButton = document.getElementById('load-model');
|
14 |
+
const container = document.getElementById('container');
|
15 |
|
16 |
let currentImage = '';
|
17 |
let currentQuery = '';
|
|
|
20 |
let model;
|
21 |
|
22 |
async function initializeSessions() {
|
23 |
+
loadModelButton.textContent = 'Loading Model...';
|
24 |
+
loadModelButton.classList.add('loading');
|
25 |
container.classList.add('disabled');
|
26 |
|
27 |
processor = await AutoProcessor.from_pretrained(model_id);
|
|
|
28 |
|
29 |
+
const dtype = dtypeSelect.value;
|
30 |
+
const options = { device: 'webgpu', };
|
31 |
+
if (dtype) {
|
32 |
+
options.dtype = dtype;
|
33 |
+
}
|
34 |
+
model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id, options);
|
35 |
+
|
36 |
+
loadModelButton.textContent = 'Model Ready';
|
37 |
+
loadModelButton.classList.remove('loading');
|
38 |
+
loadModelButton.classList.add('ready');
|
39 |
|
40 |
+
dtypeSelect.disabled = true;
|
41 |
uploadInput.disabled = false;
|
42 |
promptInput.disabled = false;
|
43 |
container.classList.remove('disabled');
|
|
|
45 |
|
46 |
async function handleQuery(imageUrl, query) {
|
47 |
try {
|
48 |
+
loadModelButton.textContent = 'Processing...';
|
49 |
|
50 |
const result = await imageTextToText(imageUrl, query, (out) => {
|
51 |
console.log({ out });
|
52 |
output.textContent = out;
|
53 |
});
|
54 |
+
|
55 |
+
loadModelButton.textContent = 'Model Ready';
|
56 |
} catch (err) {
|
57 |
+
loadModelButton.textContent = 'Error';
|
58 |
console.error(err);
|
59 |
}
|
60 |
}
|
61 |
|
62 |
+
async function imageTextToText(
|
|
|
63 |
imagePath,
|
64 |
query,
|
65 |
cb,
|
66 |
) {
|
|
|
67 |
const image = await (await RawImage.read(imagePath)).resize(448, 448);
|
68 |
const conversation = [
|
69 |
{
|
|
|
103 |
thumb.innerHTML = '';
|
104 |
}
|
105 |
|
106 |
+
loadModelButton.addEventListener('click', async () => {
|
107 |
+
dtypeSelect.disabled = true;
|
108 |
+
loadModelButton.disabled = true;
|
109 |
+
await initializeSessions();
|
110 |
+
});
|
111 |
|
112 |
// UI Event Handlers
|
113 |
exampleButton.addEventListener('click', (e) => {
|
|
|
136 |
e.preventDefault();
|
137 |
|
138 |
if (!currentImage || !currentQuery) {
|
139 |
+
loadModelButton.textContent = 'Please select an image and type a prompt';
|
140 |
+
setTimeout(() => {
|
141 |
+
loadModelButton.textContent = 'Model Ready';
|
142 |
+
}, 2000);
|
143 |
} else {
|
144 |
promptInput.disabled = true;
|
145 |
uploadInput.disabled = true;
|
style.css
CHANGED
@@ -148,3 +148,91 @@ input[type="text"]:focus {
|
|
148 |
#container.disabled #thumb svg {
|
149 |
opacity: 0.4;
|
150 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
#container.disabled #thumb svg {
|
149 |
opacity: 0.4;
|
150 |
}
|
151 |
+
|
152 |
+
#dtype-select {
|
153 |
+
width: 100%;
|
154 |
+
margin: 10px 0 0;
|
155 |
+
padding: 8px 12px;
|
156 |
+
font-size: 14px;
|
157 |
+
border: 1px solid #ddd;
|
158 |
+
border-radius: 6px;
|
159 |
+
background-color: #fff;
|
160 |
+
cursor: pointer;
|
161 |
+
margin-bottom: 10px;
|
162 |
+
outline: none;
|
163 |
+
transition: border-color 0.2s, box-shadow 0.2s;
|
164 |
+
}
|
165 |
+
|
166 |
+
#dtype-select:hover {
|
167 |
+
border-color: #bbb;
|
168 |
+
}
|
169 |
+
|
170 |
+
#dtype-select:focus {
|
171 |
+
border-color: #2196F3;
|
172 |
+
box-shadow: 0 0 0 2px rgba(33, 150, 243, 0.1);
|
173 |
+
}
|
174 |
+
|
175 |
+
#dtype-select:disabled {
|
176 |
+
background-color: #f5f5f5;
|
177 |
+
cursor: not-allowed;
|
178 |
+
opacity: 0.7;
|
179 |
+
}
|
180 |
+
|
181 |
+
#load-model {
|
182 |
+
width: 100%;
|
183 |
+
padding: 12px 20px;
|
184 |
+
font-size: 14px;
|
185 |
+
font-weight: 600;
|
186 |
+
background-color: #2196F3;
|
187 |
+
color: white;
|
188 |
+
border: none;
|
189 |
+
border-radius: 6px;
|
190 |
+
cursor: pointer;
|
191 |
+
transition: all 0.2s ease;
|
192 |
+
box-shadow: 0 2px 4px rgba(33, 150, 243, 0.2);
|
193 |
+
text-transform: uppercase;
|
194 |
+
letter-spacing: 0.5px;
|
195 |
+
margin: 0 0 15px;
|
196 |
+
position: relative;
|
197 |
+
}
|
198 |
+
|
199 |
+
#load-model:hover:not(:disabled) {
|
200 |
+
background-color: #1976D2;
|
201 |
+
box-shadow: 0 4px 8px rgba(33, 150, 243, 0.3);
|
202 |
+
transform: translateY(-1px);
|
203 |
+
}
|
204 |
+
|
205 |
+
#load-model:active:not(:disabled) {
|
206 |
+
transform: translateY(1px);
|
207 |
+
box-shadow: 0 1px 2px rgba(33, 150, 243, 0.2);
|
208 |
+
}
|
209 |
+
|
210 |
+
#load-model.loading {
|
211 |
+
color: transparent;
|
212 |
+
cursor: wait;
|
213 |
+
background-color: #90CAF9;
|
214 |
+
}
|
215 |
+
|
216 |
+
#load-model.loading::after {
|
217 |
+
content: "";
|
218 |
+
position: absolute;
|
219 |
+
width: 16px;
|
220 |
+
height: 16px;
|
221 |
+
top: 50%;
|
222 |
+
left: 50%;
|
223 |
+
margin-left: -8px;
|
224 |
+
margin-top: -8px;
|
225 |
+
border: 2px solid rgba(255, 255, 255, 0.3);
|
226 |
+
border-radius: 50%;
|
227 |
+
border-top-color: white;
|
228 |
+
animation: spin 1s linear infinite;
|
229 |
+
}
|
230 |
+
|
231 |
+
#load-model.ready {
|
232 |
+
background-color: #4CAF50;
|
233 |
+
cursor: default;
|
234 |
+
}
|
235 |
+
|
236 |
+
@keyframes spin {
|
237 |
+
to { transform: rotate(360deg); }
|
238 |
+
}
|