Paul Dufour commited on
Commit
ee4ce56
·
1 Parent(s): 246f1f5

Update demo

Browse files
Files changed (4) hide show
  1. Makefile +2 -0
  2. index.html +30 -25
  3. index.js +30 -12
  4. style.css +88 -0
Makefile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ run:
2
+ static-web-server -p 3000 --cors-allow-origins "*" --cors-allow-headers "*" --cors-expose-headers "*" -d . -e false -z
index.html CHANGED
@@ -1,13 +1,11 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
-
4
  <head>
5
  <meta charset="UTF-8" />
6
  <link rel="stylesheet" href="style.css" />
7
-
8
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
9
  <title>Vision Demo</title>
10
- <script type="importmap">
11
  {
12
  "imports": {
13
  "@huggingface/transformers": "https://esm.run/@huggingface/[email protected]"
@@ -15,32 +13,39 @@
15
  }
16
  </script>
17
  </head>
18
-
19
  <body>
20
  <h1>Vision Demo</h1>
21
  <div style="text-align: center">Everything runs directly in your browser (no server required)</div>
22
  <form id="form">
23
- <label id="status">Loading model...</label>
24
- <label id="container" for="upload">
25
- <div id="thumb">
26
- <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
27
- <path fill="#000" d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"></path>
28
- </svg>
29
- Click to upload image
30
- <label id="example">(or try example)</label>
31
- </div>
32
- <div>
33
- <input type="text" placeholder="Add your prompt and hit enter" disabled>
34
- </div>
35
- </label>
36
- <input id="upload" type="file" accept="image/*" disabled />
37
- <div id="llm-output"></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  </form>
39
-
40
- <script src="index.js" type="module">
41
- document.addEventListener('DOMContentLoaded', async () => {
42
- await import('./index.js');
43
- });
44
- </script>
45
  </body>
46
  </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
 
3
  <head>
4
  <meta charset="UTF-8" />
5
  <link rel="stylesheet" href="style.css" />
 
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
  <title>Vision Demo</title>
8
+ <script type="importmap">
9
  {
10
  "imports": {
11
  "@huggingface/transformers": "https://esm.run/@huggingface/[email protected]"
 
13
  }
14
  </script>
15
  </head>
 
16
  <body>
17
  <h1>Vision Demo</h1>
18
  <div style="text-align: center">Everything runs directly in your browser (no server required)</div>
19
  <form id="form">
20
+ <div>
21
+ <select id="dtype-select">
22
+ <option value=""></option>
23
+ <option value="fp32">fp32</option>
24
+ <option value="fp16">fp16</option>
25
+ <option value="q8">q8</option>
26
+ <option value="int8">int8</option>
27
+ <option value="uint8">uint8</option>
28
+ <option value="q4">q4</option>
29
+ <option value="bnb4">bnb4</option>
30
+ <option value="q4f16">q4f16</option>
31
+ </select>
32
+ <button id="load-model" type="button">Load Model</button>
33
+ </div>
34
+ <label id="container" for="upload">
35
+ <div id="thumb">
36
+ <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
37
+ <path fill="#000" d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"></path>
38
+ </svg>
39
+ Click to upload image
40
+ <label id="example">(or try example)</label>
41
+ </div>
42
+ <div>
43
+ <input type="text" placeholder="Add your prompt and hit enter" disabled>
44
+ </div>
45
+ </label>
46
+ <input id="upload" type="file" accept="image/*" disabled />
47
+ <div id="llm-output"></div>
48
  </form>
49
+ <script src="index.js" type="module"></script>
 
 
 
 
 
50
  </body>
51
  </html>
index.js CHANGED
@@ -1,6 +1,5 @@
1
  import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
2
 
3
-
4
  const EXAMPLE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg";
5
 
6
  const exampleButton = document.getElementById('example');
@@ -10,6 +9,9 @@ const thumb = document.getElementById('thumb');
10
  const uploadInput = document.getElementById('upload');
11
  const form = document.getElementById('form');
12
  const output = document.getElementById('llm-output');
 
 
 
13
 
14
  let currentImage = '';
15
  let currentQuery = '';
@@ -18,15 +20,24 @@ let processor;
18
  let model;
19
 
20
  async function initializeSessions() {
21
- status.textContent = 'Loading model...';
 
22
  container.classList.add('disabled');
23
 
24
  processor = await AutoProcessor.from_pretrained(model_id);
25
- model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id, { dtype: 'q4f16', device: 'webgpu' });
26
 
27
- status.textContent = 'Ready';
28
- status.classList.add('ready');
 
 
 
 
 
 
 
 
29
 
 
30
  uploadInput.disabled = false;
31
  promptInput.disabled = false;
32
  container.classList.remove('disabled');
@@ -34,25 +45,25 @@ async function initializeSessions() {
34
 
35
  async function handleQuery(imageUrl, query) {
36
  try {
37
- status.textContent = 'Analyzing...';
38
 
39
  const result = await imageTextToText(imageUrl, query, (out) => {
40
  console.log({ out });
41
  output.textContent = out;
42
  });
 
 
43
  } catch (err) {
44
- status.textContent = 'Error processing request';
45
  console.error(err);
46
  }
47
  }
48
 
49
-
50
- export async function imageTextToText(
51
  imagePath,
52
  query,
53
  cb,
54
  ) {
55
-
56
  const image = await (await RawImage.read(imagePath)).resize(448, 448);
57
  const conversation = [
58
  {
@@ -92,7 +103,11 @@ async function updatePreview(url) {
92
  thumb.innerHTML = '';
93
  }
94
 
95
- await initializeSessions();
 
 
 
 
96
 
97
  // UI Event Handlers
98
  exampleButton.addEventListener('click', (e) => {
@@ -121,7 +136,10 @@ form.addEventListener('submit', (e) => {
121
  e.preventDefault();
122
 
123
  if (!currentImage || !currentQuery) {
124
- status.textContent = 'Please select an image and type a prompt';
 
 
 
125
  } else {
126
  promptInput.disabled = true;
127
  uploadInput.disabled = true;
 
1
  import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
2
 
 
3
  const EXAMPLE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg";
4
 
5
  const exampleButton = document.getElementById('example');
 
9
  const uploadInput = document.getElementById('upload');
10
  const form = document.getElementById('form');
11
  const output = document.getElementById('llm-output');
12
+ const dtypeSelect = document.getElementById('dtype-select');
13
+ const loadModelButton = document.getElementById('load-model');
14
+ const container = document.getElementById('container');
15
 
16
  let currentImage = '';
17
  let currentQuery = '';
 
20
  let model;
21
 
22
  async function initializeSessions() {
23
+ loadModelButton.textContent = 'Loading Model...';
24
+ loadModelButton.classList.add('loading');
25
  container.classList.add('disabled');
26
 
27
  processor = await AutoProcessor.from_pretrained(model_id);
 
28
 
29
+ const dtype = dtypeSelect.value;
30
+ const options = { device: 'webgpu', };
31
+ if (dtype) {
32
+ options.dtype = dtype;
33
+ }
34
+ model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id, options);
35
+
36
+ loadModelButton.textContent = 'Model Ready';
37
+ loadModelButton.classList.remove('loading');
38
+ loadModelButton.classList.add('ready');
39
 
40
+ dtypeSelect.disabled = true;
41
  uploadInput.disabled = false;
42
  promptInput.disabled = false;
43
  container.classList.remove('disabled');
 
45
 
46
  async function handleQuery(imageUrl, query) {
47
  try {
48
+ loadModelButton.textContent = 'Processing...';
49
 
50
  const result = await imageTextToText(imageUrl, query, (out) => {
51
  console.log({ out });
52
  output.textContent = out;
53
  });
54
+
55
+ loadModelButton.textContent = 'Model Ready';
56
  } catch (err) {
57
+ loadModelButton.textContent = 'Error';
58
  console.error(err);
59
  }
60
  }
61
 
62
+ async function imageTextToText(
 
63
  imagePath,
64
  query,
65
  cb,
66
  ) {
 
67
  const image = await (await RawImage.read(imagePath)).resize(448, 448);
68
  const conversation = [
69
  {
 
103
  thumb.innerHTML = '';
104
  }
105
 
106
+ loadModelButton.addEventListener('click', async () => {
107
+ dtypeSelect.disabled = true;
108
+ loadModelButton.disabled = true;
109
+ await initializeSessions();
110
+ });
111
 
112
  // UI Event Handlers
113
  exampleButton.addEventListener('click', (e) => {
 
136
  e.preventDefault();
137
 
138
  if (!currentImage || !currentQuery) {
139
+ loadModelButton.textContent = 'Please select an image and type a prompt';
140
+ setTimeout(() => {
141
+ loadModelButton.textContent = 'Model Ready';
142
+ }, 2000);
143
  } else {
144
  promptInput.disabled = true;
145
  uploadInput.disabled = true;
style.css CHANGED
@@ -148,3 +148,91 @@ input[type="text"]:focus {
148
  #container.disabled #thumb svg {
149
  opacity: 0.4;
150
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  #container.disabled #thumb svg {
149
  opacity: 0.4;
150
  }
151
+
152
+ #dtype-select {
153
+ width: 100%;
154
+ margin: 10px 0 0;
155
+ padding: 8px 12px;
156
+ font-size: 14px;
157
+ border: 1px solid #ddd;
158
+ border-radius: 6px;
159
+ background-color: #fff;
160
+ cursor: pointer;
161
+ margin-bottom: 10px;
162
+ outline: none;
163
+ transition: border-color 0.2s, box-shadow 0.2s;
164
+ }
165
+
166
+ #dtype-select:hover {
167
+ border-color: #bbb;
168
+ }
169
+
170
+ #dtype-select:focus {
171
+ border-color: #2196F3;
172
+ box-shadow: 0 0 0 2px rgba(33, 150, 243, 0.1);
173
+ }
174
+
175
+ #dtype-select:disabled {
176
+ background-color: #f5f5f5;
177
+ cursor: not-allowed;
178
+ opacity: 0.7;
179
+ }
180
+
181
+ #load-model {
182
+ width: 100%;
183
+ padding: 12px 20px;
184
+ font-size: 14px;
185
+ font-weight: 600;
186
+ background-color: #2196F3;
187
+ color: white;
188
+ border: none;
189
+ border-radius: 6px;
190
+ cursor: pointer;
191
+ transition: all 0.2s ease;
192
+ box-shadow: 0 2px 4px rgba(33, 150, 243, 0.2);
193
+ text-transform: uppercase;
194
+ letter-spacing: 0.5px;
195
+ margin: 0 0 15px;
196
+ position: relative;
197
+ }
198
+
199
+ #load-model:hover:not(:disabled) {
200
+ background-color: #1976D2;
201
+ box-shadow: 0 4px 8px rgba(33, 150, 243, 0.3);
202
+ transform: translateY(-1px);
203
+ }
204
+
205
+ #load-model:active:not(:disabled) {
206
+ transform: translateY(1px);
207
+ box-shadow: 0 1px 2px rgba(33, 150, 243, 0.2);
208
+ }
209
+
210
+ #load-model.loading {
211
+ color: transparent;
212
+ cursor: wait;
213
+ background-color: #90CAF9;
214
+ }
215
+
216
+ #load-model.loading::after {
217
+ content: "";
218
+ position: absolute;
219
+ width: 16px;
220
+ height: 16px;
221
+ top: 50%;
222
+ left: 50%;
223
+ margin-left: -8px;
224
+ margin-top: -8px;
225
+ border: 2px solid rgba(255, 255, 255, 0.3);
226
+ border-radius: 50%;
227
+ border-top-color: white;
228
+ animation: spin 1s linear infinite;
229
+ }
230
+
231
+ #load-model.ready {
232
+ background-color: #4CAF50;
233
+ cursor: default;
234
+ }
235
+
236
+ @keyframes spin {
237
+ to { transform: rotate(360deg); }
238
+ }