File size: 3,335 Bytes
01f754d b802856 d98e36f 01f754d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
export async function getEmbeddings(
worker,
weightsURL,
tokenizerURL,
configURL,
modelID,
sentences,
updateStatus = null
) {
return new Promise((resolve, reject) => {
worker.postMessage({
weightsURL,
tokenizerURL,
configURL,
modelID,
sentences,
});
function messageHandler(event) {
if ("error" in event.data) {
worker.removeEventListener("message", messageHandler);
reject(new Error(event.data.error));
}
if (event.data.status === "complete") {
worker.removeEventListener("message", messageHandler);
resolve(event.data);
}
if (updateStatus) updateStatus(event.data);
}
worker.addEventListener("message", messageHandler);
});
}
const MODELS = {
intfloat_e5_small_v2: {
base_url: "https://huggingface.co/intfloat/e5-small-v2/resolve/main/",
search_prefix: "query: ",
document_prefix: "passage: ",
},
intfloat_e5_base_v2: {
base_url: "https://huggingface.co/intfloat/e5-base-v2/resolve/main/",
search_prefix: "query: ",
document_prefix: "passage:",
},
intfloat_multilingual_e5_small: {
base_url:
"https://huggingface.co/intfloat/multilingual-e5-small/resolve/main/",
search_prefix: "query: ",
document_prefix: "passage: ",
},
sentence_transformers_all_MiniLM_L6_v2: {
base_url:
"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/refs%2Fpr%2F21/",
search_prefix: "",
document_prefix: "",
},
sentence_transformers_all_MiniLM_L12_v2: {
base_url:
"https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/refs%2Fpr%2F4/",
search_prefix: "",
document_prefix: "",
},
gte_tiny: {
base_url: "https://huggingface.co/TaylorAI/gte-tiny/resolve/refs%2Fpr%2F2/",
search_prefix: "",
document_prefix: "",
},
bge_micro: {
base_url: "https://huggingface.co/TaylorAI/bge-micro/resolve/refs%2Fpr%2F1/",
search_prefix: "",
document_prefix: "",
},
};
export function getModelInfo(id) {
return {
modelURL: MODELS[id].base_url + "model.safetensors",
configURL: MODELS[id].base_url + "config.json",
tokenizerURL: MODELS[id].base_url + "tokenizer.json",
search_prefix: MODELS[id].search_prefix,
document_prefix: MODELS[id].document_prefix,
};
}
export function cosineSimilarity(vec1, vec2) {
const dot = vec1.reduce((acc, val, i) => acc + val * vec2[i], 0);
const a = Math.sqrt(vec1.reduce((acc, val) => acc + val * val, 0));
const b = Math.sqrt(vec2.reduce((acc, val) => acc + val * val, 0));
return dot / (a * b);
}
export async function getWikiText(article) {
// thanks to wikipedia for the API
const URL = `https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exlimit=1&titles=${article}&explaintext=1&exsectionformat=plain&format=json&origin=*`;
return fetch(URL, {
method: "GET",
headers: {
Accept: "application/json",
},
})
.then((r) => r.json())
.then((data) => {
const pages = data.query.pages;
const pageId = Object.keys(pages)[0];
const extract = pages[pageId].extract;
if (extract === undefined || extract === "") {
throw new Error("No article found");
}
return extract;
})
.catch((error) => console.error("Error:", error));
}
|