Add support for transformers.js (#6)
Browse files- Add support for transformers.js (8eea22cb12cc5d205be48fc8120523d887f4abfe)
Co-authored-by: Joshua <[email protected]>
README.md
CHANGED
@@ -8,6 +8,7 @@ tags:
|
|
8 |
- mteb
|
9 |
- arctic
|
10 |
- snowflake-arctic-embed
|
|
|
11 |
model-index:
|
12 |
- name: snowflake-arctic-m-long
|
13 |
results:
|
@@ -3020,6 +3021,37 @@ If you use the long context model with more than 2048 tokens, ensure that you in
|
|
3020 |
model = AutoModel.from_pretrained('Snowflake/snowflake-arctic-embed-m-long', trust_remote_code=True, rotary_scaling_factor=2)
|
3021 |
```
|
3022 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3023 |
|
3024 |
## FAQ
|
3025 |
|
|
|
8 |
- mteb
|
9 |
- arctic
|
10 |
- snowflake-arctic-embed
|
11 |
+
- transformers.js
|
12 |
model-index:
|
13 |
- name: snowflake-arctic-m-long
|
14 |
results:
|
|
|
3021 |
model = AutoModel.from_pretrained('Snowflake/snowflake-arctic-embed-m-long', trust_remote_code=True, rotary_scaling_factor=2)
|
3022 |
```
|
3023 |
|
3024 |
+
### Using Transformers.js
|
3025 |
+
|
3026 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) by running:
|
3027 |
+
```bash
|
3028 |
+
npm i @xenova/transformers
|
3029 |
+
```
|
3030 |
+
|
3031 |
+
You can then use the model to compute embeddings as follows:
|
3032 |
+
|
3033 |
+
```js
|
3034 |
+
import { pipeline, dot } from '@xenova/transformers';
|
3035 |
+
|
3036 |
+
// Create feature extraction pipeline
|
3037 |
+
const extractor = await pipeline('feature-extraction', 'Snowflake/snowflake-arctic-embed-m-long', {
|
3038 |
+
quantized: false, // Comment out this line to use the quantized version
|
3039 |
+
});
|
3040 |
+
|
3041 |
+
// Generate sentence embeddings
|
3042 |
+
const sentences = [
|
3043 |
+
'Represent this sentence for searching relevant passages: Where can I get the best tacos?',
|
3044 |
+
'The Data Cloud!',
|
3045 |
+
'Mexico City of Course!',
|
3046 |
+
]
|
3047 |
+
const output = await extractor(sentences, { normalize: true, pooling: 'cls' });
|
3048 |
+
|
3049 |
+
// Compute similarity scores
|
3050 |
+
const [source_embeddings, ...document_embeddings ] = output.tolist();
|
3051 |
+
const similarities = document_embeddings.map(x => dot(source_embeddings, x));
|
3052 |
+
console.log(similarities); // [0.36740492125676116, 0.42407774292046635]
|
3053 |
+
```
|
3054 |
+
|
3055 |
|
3056 |
## FAQ
|
3057 |
|