coref / dist /script.js
julien-c's picture
julien-c HF staff
Hardcode static outputs to get rid of API
83140f2 verified
const COREF_HARDCODED_OUTPUTS = new Map([
[
`I love my father and my mother. They work hard. She is always nice but he is sometimes rude.`,
{
cleanedText: "I love my father and my mother. They work hard. She is always nice but he is sometimes rude.",
corefResText: "I love my father and my mother. They work hard. my mother is always nice but he is sometimes rude.",
coreferences: [
{
resolved: "my mother",
original: "She",
},
],
mentions: [
{
index: 0,
type: "PRONOMINAL",
end: 1,
start: 0,
startToken: 0,
text: "I",
utterance: 0,
endToken: 1,
},
{
index: 1,
type: "PRONOMINAL",
end: 9,
start: 7,
startToken: 2,
text: "my",
utterance: 0,
endToken: 3,
},
{
index: 2,
type: "LIST",
end: 30,
start: 7,
startToken: 2,
text: "my father and my mother",
utterance: 0,
endToken: 7,
},
{
index: 3,
type: "NOMINAL",
end: 16,
start: 7,
startToken: 2,
text: "my father",
utterance: 0,
endToken: 4,
},
{
index: 4,
type: "PRONOMINAL",
end: 23,
start: 21,
startToken: 5,
text: "my",
utterance: 0,
endToken: 6,
},
{
index: 5,
type: "NOMINAL",
end: 30,
start: 21,
startToken: 5,
text: "my mother",
utterance: 0,
endToken: 7,
},
{
index: 6,
type: "PRONOMINAL",
end: 36,
start: 32,
startToken: 8,
text: "They",
utterance: 0,
endToken: 9,
},
{
index: 7,
type: "PRONOMINAL",
end: 51,
start: 48,
startToken: 12,
text: "She",
utterance: 0,
endToken: 13,
},
{
index: 8,
type: "PRONOMINAL",
end: 73,
start: 71,
startToken: 17,
text: "he",
utterance: 0,
endToken: 18,
},
],
singleScores: {
"0": null,
"1": -0.5316791573216993,
"2": 1.5312658152183065,
"3": 1.555544240226482,
"4": -1.3024868053674725,
"5": 1.8934082334098847,
"6": -0.9070692483062055,
"7": 0.052800267274213275,
"8": -0.14679673122527748,
},
pairScores: {
"0": {},
"1": {
"0": 15.268471128411697,
},
"2": {
"0": -4.4230603182609896,
"1": -3.382446088190441,
},
"3": {
"0": -4.2304546215104555,
"1": -3.5681677896088786,
"2": -1.9470202037608262,
},
"4": {
"0": 11.654522570777317,
"1": 13.455601870537567,
"2": -3.218918301345336,
"3": -3.882381584104524,
},
"5": {
"0": -3.9280501278811983,
"1": -4.426880262361277,
"2": -1.7714693884323367,
"3": -2.722532370602323,
"4": -3.290164176455163,
},
"6": {
"0": -4.492101447800931,
"1": -4.636904674331316,
"2": 3.1158072056943666,
"3": -2.7375757747875573,
"4": -4.689981185699828,
"5": -2.6728186848475537,
},
"7": {
"0": -3.197215354228037,
"1": -3.538538702704479,
"2": -0.02408947507481729,
"3": 0.3052410603657605,
"4": -3.519641485034609,
"5": 1.8101046215415115,
"6": -3.1353342036917917,
},
"8": {
"0": -0.08532621450323319,
"1": 5.371002989344198,
"2": -1.4091179987286686,
"3": 3.152921411948177,
"4": 2.268706305216419,
"5": -2.340743897439996,
"6": -2.8835496283480597,
"7": -3.4832126005315334,
},
},
cleanedContext: "",
isResolved: true,
},
],
[
`My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.`,
{
cleanedText: "My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.",
corefResText: "My sister is swimming with my sister classmates. her classmates are not bad, but my sister is better. I love watching my sister swim.",
coreferences: [
{
resolved: "My sister",
original: "she",
},
{
resolved: "My sister",
original: "her",
},
{
resolved: "her classmates",
original: "They",
},
{
resolved: "My sister",
original: "her",
},
],
mentions: [
{
index: 0,
type: "PRONOMINAL",
end: 2,
start: 0,
startToken: 0,
text: "My",
utterance: 0,
endToken: 1,
},
{
index: 1,
type: "NOMINAL",
end: 9,
start: 0,
startToken: 0,
text: "My sister",
utterance: 0,
endToken: 2,
},
{
index: 2,
type: "PRONOMINAL",
end: 30,
start: 27,
startToken: 5,
text: "her",
utterance: 0,
endToken: 6,
},
{
index: 3,
type: "NOMINAL",
end: 41,
start: 27,
startToken: 5,
text: "her classmates",
utterance: 0,
endToken: 7,
},
{
index: 4,
type: "PRONOMINAL",
end: 47,
start: 43,
startToken: 8,
text: "They",
utterance: 0,
endToken: 9,
},
{
index: 5,
type: "PRONOMINAL",
end: 68,
start: 65,
startToken: 14,
text: "she",
utterance: 0,
endToken: 15,
},
{
index: 6,
type: "PRONOMINAL",
end: 81,
start: 80,
startToken: 18,
text: "I",
utterance: 0,
endToken: 19,
},
{
index: 7,
type: "PRONOMINAL",
end: 99,
start: 96,
startToken: 21,
text: "her",
utterance: 0,
endToken: 22,
},
],
singleScores: {
"0": null,
"1": 1.609437735243254,
"2": -1.1017402175324822,
"3": 1.4347901008486401,
"4": -0.02895837171142801,
"5": -0.4266623545401909,
"6": 0.009921976322164627,
"7": -1.8629830475049451,
},
pairScores: {
"0": {},
"1": {
"0": -2.2413815226574703,
},
"2": {
"0": -2.409825572927252,
"1": 5.707592445811339,
},
"3": {
"0": -3.0653216162902854,
"1": -1.6904548462117184,
"2": -1.8322836987315447,
},
"4": {
"0": -3.688547511940379,
"1": -2.0587007889253717,
"2": -3.370481889890517,
"3": 2.67729831167075,
},
"5": {
"0": -2.6457134524861243,
"1": 8.41568336157475,
"2": 5.457479617210075,
"3": 0.5541345662624297,
"4": -2.952959651402653,
},
"6": {
"0": 6.483305186430136,
"1": -2.5309543937239427,
"2": -2.4954945953746566,
"3": -2.812183970273315,
"4": -2.998588381716906,
"5": -2.2723718581884205,
},
"7": {
"0": -2.9154581227140457,
"1": 9.352887851205328,
"2": 9.844018411095597,
"3": 1.8138255060465474,
"4": -3.3396902374034765,
"5": 10.035481487601054,
"6": -3.0660799723685312,
},
},
cleanedContext: "",
isResolved: true,
},
],
[
`My mother's name is Sasha, she likes dogs.`,
{
cleanedText: "My mother's name is Sasha, she likes dogs.",
corefResText: "My mother's name is Sasha, my mother likes dogs.",
coreferences: [
{
resolved: "My mother",
original: "she",
},
],
mentions: [
{
index: 0,
type: "PRONOMINAL",
end: 2,
start: 0,
startToken: 0,
text: "My",
utterance: 0,
endToken: 1,
},
{
index: 1,
type: "NOMINAL",
end: 9,
start: 0,
startToken: 0,
text: "My mother",
utterance: 0,
endToken: 2,
},
{
index: 2,
type: "NOMINAL",
end: 16,
start: 0,
startToken: 0,
text: "My mother's name",
utterance: 0,
endToken: 4,
},
{
index: 3,
type: "PROPER",
end: 25,
start: 20,
startToken: 5,
text: "Sasha",
utterance: 0,
endToken: 6,
},
{
index: 4,
type: "PRONOMINAL",
end: 30,
start: 27,
startToken: 7,
text: "she",
utterance: 0,
endToken: 8,
},
{
index: 5,
type: "NOMINAL",
end: 41,
start: 37,
startToken: 9,
text: "dogs",
utterance: 0,
endToken: 10,
},
],
singleScores: {
"0": null,
"1": 1.9246201814037063,
"2": 1.3833144431588633,
"3": 1.8293318485967687,
"4": 0.11171655922904344,
"5": 1.8179855402495786,
},
pairScores: {
"0": {},
"1": {
"0": -2.021441708531068,
},
"2": {
"0": -2.4538823419832134,
"1": -1.5272053058795838,
},
"3": {
"0": -2.2864554131219212,
"1": -1.5158990748985923,
"2": -1.5676019384720228,
},
"4": {
"0": -1.077294938181586,
"1": 5.190687831349847,
"2": 1.3862198517098907,
"3": 1.5871185522743856,
},
"5": {
"0": -2.957565366582327,
"1": -1.572206989880445,
"2": -1.5136893865248766,
"3": -2.295173505354227,
"4": -2.2454728131610056,
},
},
cleanedContext: "",
isResolved: true,
},
],
]);
class Coref {
constructor(endpoint, opts) {
this.onStart = () => { };
this.onSuccess = () => { };
this.endpoint = endpoint;
if (opts.onStart) {
this.onStart = opts.onStart;
}
if (opts.onSuccess) {
this.onSuccess = opts.onSuccess;
}
window.addEventListener('resize', this.svgResize);
}
svgResize() {
if (!this.container || !this.svgContainer) {
return;
}
this.svgContainer.setAttribute('width', `${this.container.scrollWidth}`);
this.svgContainer.setAttribute('height', `${this.container.scrollHeight}`);
}
parse(text) {
this.onStart();
if (COREF_HARDCODED_OUTPUTS.size) {
const output = COREF_HARDCODED_OUTPUTS.get(text) ?? [...COREF_HARDCODED_OUTPUTS.values()][0];
setTimeout(() => {
this.onSuccess();
this.render(output);
}, 300);
return;
}
const path = `${this.endpoint}?text=${encodeURIComponent(text)}`;
const request = new XMLHttpRequest();
request.open('GET', path);
request.onload = () => {
if (request.status >= 200 && request.status < 400) {
this.onSuccess();
const res = JSON.parse(request.responseText);
this.render(res);
}
else {
console.error('Error', request);
}
};
request.send();
}
render(res) {
const mentions = res.mentions;
for (const m of mentions) {
m.singleScore = res.singleScores[m.index] || undefined;
}
const markup = Displacy.render(res.cleanedText, mentions);
if (!this.container || !this.svgContainer) {
return;
}
this.container.innerHTML = `<div class="text">${markup}</div>`;
this.svgContainer.textContent = "";
this.svgResize();
window.container = this.container;
window.svgContainer = this.svgContainer;
const endY = document.querySelector('.container .text').getBoundingClientRect().top
- this.container.getBoundingClientRect().top
- 2;
SvgArrow.yArrows = endY;
for (const [__from, scores] of Object.entries(res.pairScores)) {
const from = parseInt(__from, 10);
for (const [__to, score] of Object.entries(scores)) {
const to = parseInt(__to, 10);
const markFrom = document.querySelector(`mark[data-index="${from}"]`);
const markTo = document.querySelector(`mark[data-index="${to}"]`);
const arrow = new SvgArrow(this.container, markFrom, markTo, score);
if (score >= Math.max(...Object.values(scores))) {
arrow.classNames.push('score-ok');
const singleScore = res.singleScores[from];
if (singleScore && score >= singleScore) {
arrow.classNames.push('score-best');
}
}
this.svgContainer.appendChild(arrow.generate());
}
}
document.querySelectorAll('.displacy-arrow.score-ok').forEach((arw) => {
this.svgContainer.appendChild(arw);
});
}
}
class Displacy {
static sortSpans(spans) {
spans.sort((a, b) => {
if (a.start === b.start) {
return b.end - a.end;
}
return a.start - b.start;
});
spans.forEach((s, i) => {
if (i < spans.length - 1) {
const sNext = spans[i + 1];
if (s.start < sNext.start && s.end > sNext.start) {
console.log("ERROR", "Spans: strict overlapping");
}
}
});
}
static render(text, spans) {
this.sortSpans(spans);
const tags = {};
const __addTag = (i, s, tag) => {
if (Array.isArray(tags[i])) {
tags[i].push({ span: s, tag: tag });
}
else {
tags[i] = [{ span: s, tag: tag }];
}
};
for (const s of spans) {
__addTag(s.start, s, "start");
__addTag(s.end, s, "end");
}
let out = {
__content: "",
append(s) {
this.__content += s;
}
};
let offset = 0;
const indexes = Object.keys(tags).map(k => parseInt(k, 10)).sort((a, b) => a - b);
for (const i of indexes) {
const spanTags = tags[i];
if (i > offset) {
out.append(text.slice(offset, i));
}
offset = i;
for (const sT of spanTags) {
if (sT.tag === "start") {
out.append(`<mark data-entity="${sT.span.type.toLowerCase()}" data-index="${sT.span.index}">`);
const singleScore = sT.span.singleScore;
if (singleScore) {
out.append(`<span class="single-score">${singleScore.toFixed(3)}</span>`);
}
}
else {
out.append(`</mark>`);
}
}
}
out.append(text.slice(offset, text.length));
return out.__content;
}
}
class SvgArrow {
constructor(container, markFrom, markTo, score) {
this.classNames = [];
this.container = container;
this.markFrom = markFrom;
this.markTo = markTo;
this.score = score;
}
_el(tag, options) {
const { classnames = [], attributes = [], style = [], children = [], text, id, xlink } = options;
const ns = 'http://www.w3.org/2000/svg';
const nsx = 'http://www.w3.org/1999/xlink';
const el = document.createElementNS(ns, tag);
classnames.forEach(name => el.classList.add(name));
attributes.forEach(([attr, value]) => el.setAttribute(attr, value));
style.forEach(([prop, value]) => el.style[prop] = value);
if (xlink)
el.setAttributeNS(nsx, 'xlink:href', xlink);
if (text)
el.appendChild(document.createTextNode(text));
if (id)
el.id = id;
children.forEach(child => el.appendChild(child));
return el;
}
generate() {
const rand = Math.random().toString(36).substr(2, 8);
const startX = this.markTo.getBoundingClientRect().left
- this.container.getBoundingClientRect().left
+ this.markTo.getBoundingClientRect().width / 2;
const endX = this.markFrom.getBoundingClientRect().left
- this.container.getBoundingClientRect().left
+ this.markFrom.getBoundingClientRect().width / 2;
const curveY = Math.max(-50, SvgArrow.yArrows - (endX - startX) / 3.2);
return this._el('g', {
classnames: ['displacy-arrow'].concat(this.classNames),
children: [
this._el('path', {
id: `arrow-${rand}`,
classnames: ['displacy-arc'],
attributes: [
['d', `M${startX},${SvgArrow.yArrows} C${startX},${curveY} ${endX},${curveY} ${endX},${SvgArrow.yArrows}`],
['stroke-width', '2px'],
['fill', 'none'],
['stroke', 'currentColor'],
]
}),
this._el('text', {
attributes: [
['dy', '1em']
],
children: [
this._el('textPath', {
xlink: `#arrow-${rand}`,
classnames: ['displacy-label'],
attributes: [
['startOffset', '50%'],
['fill', 'currentColor'],
['text-anchor', 'middle'],
],
text: this.score.toFixed(2)
})
]
}),
]
});
}
}
SvgArrow.yArrows = 0;
const ENDPOINT = "/static-proxy?url=https%3A%2F%2Fcoref.huggingface.co%2Fcoref%26quot%3B%3C%2Fspan%3E%3B%3C!-- HTML_TAG_END -->
const DEFAULT_NLP_TEXT = () => {
const items = [
`I love my father and my mother. They work hard. She is always nice but he is sometimes rude.`,
`My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.`,
`My mother's name is Sasha, she likes dogs.`
];
return items[Math.floor(Math.random() * items.length)];
};
const loading = () => {
document.body.classList.toggle('loading');
};
const toggleDebug = () => {
document.body.classList.toggle('debug');
const icons = document.querySelectorAll('.svg-checkbox');
icons.forEach((icon) => {
icon.classList.toggle('hide');
});
window.localStorage.setItem('debug', document.body.classList.contains('debug').toString());
};
const coref = new Coref(ENDPOINT, {
onStart: loading,
onSuccess: loading,
});
const getQueryVar = (key) => {
const query = window.location.search.substring(1);
const params = query.split('&').map(param => param.split('='));
for (const param of params) {
if (param[0] === key) {
return decodeURIComponent(param[1]);
}
}
return undefined;
};
const updateURL = (text) => {
history.pushState({ text: text }, "", `?text=${encodeURIComponent(text)}`);
};
document.addEventListener('DOMContentLoaded', () => {
const $input = document.querySelector('input.input-message');
const $form = document.querySelector('form.js-form');
const $checkbox = document.querySelector('.js-checkbox');
const $svgContainer = document.querySelector('.svg-container');
coref.container = document.querySelector('.container');
coref.svgContainer = $svgContainer;
{
const queryText = getQueryVar('text');
if (queryText) {
$input.value = queryText;
coref.parse(queryText);
}
else {
coref.parse(DEFAULT_NLP_TEXT());
}
}
$input.addEventListener('keydown', (evt) => {
if (evt.charCode === 13) {
evt.preventDefault();
$form.submit();
}
});
$form.addEventListener('submit', (evt) => {
evt.preventDefault();
const text = ($input.value.length > 0)
? $input.value
: DEFAULT_NLP_TEXT();
updateURL(text);
coref.parse(text);
});
$checkbox.addEventListener('click', () => {
toggleDebug();
});
if (window.localStorage.getItem('debug') !== 'false') {
toggleDebug();
}
});