Commit
·
d9b3577
1
Parent(s):
6b225ba
try to boost CPU usage
Browse files- public/index.html +1 -1
- src/index.mts +3 -3
public/index.html
CHANGED
@@ -39,7 +39,7 @@
|
|
39 |
</button>
|
40 |
<span class="py-3" x-show="state === 'loading'">Waiting for the stream to begin (might take a few minutes)..</span>
|
41 |
<span class="py-3" x-show="state === 'streaming'">
|
42 |
-
Streamed <span x-text="humanFileSize(size, true, 2)"></span> so far<br/> (hang on, this may take
|
43 |
</div>
|
44 |
</div>
|
45 |
</div>
|
|
|
39 |
</button>
|
40 |
<span class="py-3" x-show="state === 'loading'">Waiting for the stream to begin (might take a few minutes)..</span>
|
41 |
<span class="py-3" x-show="state === 'streaming'">
|
42 |
+
Streamed <span x-text="humanFileSize(size, true, 2)"></span> so far<br/> (hang on, this may take 5-15 minutes ☕)</span>
|
43 |
</div>
|
44 |
</div>
|
45 |
</div>
|
src/index.mts
CHANGED
@@ -37,7 +37,7 @@ const app = express()
|
|
37 |
const port = 7860
|
38 |
|
39 |
const minPromptSize = 16 // if you change this, you will need to also change in public/index.html
|
40 |
-
const timeoutInSec =
|
41 |
|
42 |
app.use(express.static("public"))
|
43 |
|
@@ -93,7 +93,7 @@ app.get("/app", async (req, res) => {
|
|
93 |
|
94 |
// naive implementation: we say we are out of capacity
|
95 |
if (pending.queue.length >= maxParallelRequests) {
|
96 |
-
res.write('Sorry, max nb of parallel requests reached. A new slot should be available in <
|
97 |
res.end()
|
98 |
return
|
99 |
}
|
@@ -139,7 +139,7 @@ ${prefix}`
|
|
139 |
|
140 |
const options = {
|
141 |
prompt: finalPrompt,
|
142 |
-
nThreads:
|
143 |
nTokPredict: 1024,
|
144 |
topK: 40,
|
145 |
topP: 0.1,
|
|
|
37 |
const port = 7860
|
38 |
|
39 |
const minPromptSize = 16 // if you change this, you will need to also change in public/index.html
|
40 |
+
const timeoutInSec = 15 * 60
|
41 |
|
42 |
app.use(express.static("public"))
|
43 |
|
|
|
93 |
|
94 |
// naive implementation: we say we are out of capacity
|
95 |
if (pending.queue.length >= maxParallelRequests) {
|
96 |
+
res.write('Sorry, max nb of parallel requests reached. A new slot should be available in < 15 min.')
|
97 |
res.end()
|
98 |
return
|
99 |
}
|
|
|
139 |
|
140 |
const options = {
|
141 |
prompt: finalPrompt,
|
142 |
+
nThreads: 6, // try to use the most of our vCPUs
|
143 |
nTokPredict: 1024,
|
144 |
topK: 40,
|
145 |
topP: 0.1,
|