Spaces:
Running
Running
const COREF_HARDCODED_OUTPUTS = new Map([ | |
[ | |
`I love my father and my mother. They work hard. She is always nice but he is sometimes rude.`, | |
{ | |
cleanedText: "I love my father and my mother. They work hard. She is always nice but he is sometimes rude.", | |
corefResText: "I love my father and my mother. They work hard. my mother is always nice but he is sometimes rude.", | |
coreferences: [ | |
{ | |
resolved: "my mother", | |
original: "She", | |
}, | |
], | |
mentions: [ | |
{ | |
index: 0, | |
type: "PRONOMINAL", | |
end: 1, | |
start: 0, | |
startToken: 0, | |
text: "I", | |
utterance: 0, | |
endToken: 1, | |
}, | |
{ | |
index: 1, | |
type: "PRONOMINAL", | |
end: 9, | |
start: 7, | |
startToken: 2, | |
text: "my", | |
utterance: 0, | |
endToken: 3, | |
}, | |
{ | |
index: 2, | |
type: "LIST", | |
end: 30, | |
start: 7, | |
startToken: 2, | |
text: "my father and my mother", | |
utterance: 0, | |
endToken: 7, | |
}, | |
{ | |
index: 3, | |
type: "NOMINAL", | |
end: 16, | |
start: 7, | |
startToken: 2, | |
text: "my father", | |
utterance: 0, | |
endToken: 4, | |
}, | |
{ | |
index: 4, | |
type: "PRONOMINAL", | |
end: 23, | |
start: 21, | |
startToken: 5, | |
text: "my", | |
utterance: 0, | |
endToken: 6, | |
}, | |
{ | |
index: 5, | |
type: "NOMINAL", | |
end: 30, | |
start: 21, | |
startToken: 5, | |
text: "my mother", | |
utterance: 0, | |
endToken: 7, | |
}, | |
{ | |
index: 6, | |
type: "PRONOMINAL", | |
end: 36, | |
start: 32, | |
startToken: 8, | |
text: "They", | |
utterance: 0, | |
endToken: 9, | |
}, | |
{ | |
index: 7, | |
type: "PRONOMINAL", | |
end: 51, | |
start: 48, | |
startToken: 12, | |
text: "She", | |
utterance: 0, | |
endToken: 13, | |
}, | |
{ | |
index: 8, | |
type: "PRONOMINAL", | |
end: 73, | |
start: 71, | |
startToken: 17, | |
text: "he", | |
utterance: 0, | |
endToken: 18, | |
}, | |
], | |
singleScores: { | |
"0": null, | |
"1": -0.5316791573216993, | |
"2": 1.5312658152183065, | |
"3": 1.555544240226482, | |
"4": -1.3024868053674725, | |
"5": 1.8934082334098847, | |
"6": -0.9070692483062055, | |
"7": 0.052800267274213275, | |
"8": -0.14679673122527748, | |
}, | |
pairScores: { | |
"0": {}, | |
"1": { | |
"0": 15.268471128411697, | |
}, | |
"2": { | |
"0": -4.4230603182609896, | |
"1": -3.382446088190441, | |
}, | |
"3": { | |
"0": -4.2304546215104555, | |
"1": -3.5681677896088786, | |
"2": -1.9470202037608262, | |
}, | |
"4": { | |
"0": 11.654522570777317, | |
"1": 13.455601870537567, | |
"2": -3.218918301345336, | |
"3": -3.882381584104524, | |
}, | |
"5": { | |
"0": -3.9280501278811983, | |
"1": -4.426880262361277, | |
"2": -1.7714693884323367, | |
"3": -2.722532370602323, | |
"4": -3.290164176455163, | |
}, | |
"6": { | |
"0": -4.492101447800931, | |
"1": -4.636904674331316, | |
"2": 3.1158072056943666, | |
"3": -2.7375757747875573, | |
"4": -4.689981185699828, | |
"5": -2.6728186848475537, | |
}, | |
"7": { | |
"0": -3.197215354228037, | |
"1": -3.538538702704479, | |
"2": -0.02408947507481729, | |
"3": 0.3052410603657605, | |
"4": -3.519641485034609, | |
"5": 1.8101046215415115, | |
"6": -3.1353342036917917, | |
}, | |
"8": { | |
"0": -0.08532621450323319, | |
"1": 5.371002989344198, | |
"2": -1.4091179987286686, | |
"3": 3.152921411948177, | |
"4": 2.268706305216419, | |
"5": -2.340743897439996, | |
"6": -2.8835496283480597, | |
"7": -3.4832126005315334, | |
}, | |
}, | |
cleanedContext: "", | |
isResolved: true, | |
}, | |
], | |
[ | |
`My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.`, | |
{ | |
cleanedText: "My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.", | |
corefResText: "My sister is swimming with my sister classmates. her classmates are not bad, but my sister is better. I love watching my sister swim.", | |
coreferences: [ | |
{ | |
resolved: "My sister", | |
original: "she", | |
}, | |
{ | |
resolved: "My sister", | |
original: "her", | |
}, | |
{ | |
resolved: "her classmates", | |
original: "They", | |
}, | |
{ | |
resolved: "My sister", | |
original: "her", | |
}, | |
], | |
mentions: [ | |
{ | |
index: 0, | |
type: "PRONOMINAL", | |
end: 2, | |
start: 0, | |
startToken: 0, | |
text: "My", | |
utterance: 0, | |
endToken: 1, | |
}, | |
{ | |
index: 1, | |
type: "NOMINAL", | |
end: 9, | |
start: 0, | |
startToken: 0, | |
text: "My sister", | |
utterance: 0, | |
endToken: 2, | |
}, | |
{ | |
index: 2, | |
type: "PRONOMINAL", | |
end: 30, | |
start: 27, | |
startToken: 5, | |
text: "her", | |
utterance: 0, | |
endToken: 6, | |
}, | |
{ | |
index: 3, | |
type: "NOMINAL", | |
end: 41, | |
start: 27, | |
startToken: 5, | |
text: "her classmates", | |
utterance: 0, | |
endToken: 7, | |
}, | |
{ | |
index: 4, | |
type: "PRONOMINAL", | |
end: 47, | |
start: 43, | |
startToken: 8, | |
text: "They", | |
utterance: 0, | |
endToken: 9, | |
}, | |
{ | |
index: 5, | |
type: "PRONOMINAL", | |
end: 68, | |
start: 65, | |
startToken: 14, | |
text: "she", | |
utterance: 0, | |
endToken: 15, | |
}, | |
{ | |
index: 6, | |
type: "PRONOMINAL", | |
end: 81, | |
start: 80, | |
startToken: 18, | |
text: "I", | |
utterance: 0, | |
endToken: 19, | |
}, | |
{ | |
index: 7, | |
type: "PRONOMINAL", | |
end: 99, | |
start: 96, | |
startToken: 21, | |
text: "her", | |
utterance: 0, | |
endToken: 22, | |
}, | |
], | |
singleScores: { | |
"0": null, | |
"1": 1.609437735243254, | |
"2": -1.1017402175324822, | |
"3": 1.4347901008486401, | |
"4": -0.02895837171142801, | |
"5": -0.4266623545401909, | |
"6": 0.009921976322164627, | |
"7": -1.8629830475049451, | |
}, | |
pairScores: { | |
"0": {}, | |
"1": { | |
"0": -2.2413815226574703, | |
}, | |
"2": { | |
"0": -2.409825572927252, | |
"1": 5.707592445811339, | |
}, | |
"3": { | |
"0": -3.0653216162902854, | |
"1": -1.6904548462117184, | |
"2": -1.8322836987315447, | |
}, | |
"4": { | |
"0": -3.688547511940379, | |
"1": -2.0587007889253717, | |
"2": -3.370481889890517, | |
"3": 2.67729831167075, | |
}, | |
"5": { | |
"0": -2.6457134524861243, | |
"1": 8.41568336157475, | |
"2": 5.457479617210075, | |
"3": 0.5541345662624297, | |
"4": -2.952959651402653, | |
}, | |
"6": { | |
"0": 6.483305186430136, | |
"1": -2.5309543937239427, | |
"2": -2.4954945953746566, | |
"3": -2.812183970273315, | |
"4": -2.998588381716906, | |
"5": -2.2723718581884205, | |
}, | |
"7": { | |
"0": -2.9154581227140457, | |
"1": 9.352887851205328, | |
"2": 9.844018411095597, | |
"3": 1.8138255060465474, | |
"4": -3.3396902374034765, | |
"5": 10.035481487601054, | |
"6": -3.0660799723685312, | |
}, | |
}, | |
cleanedContext: "", | |
isResolved: true, | |
}, | |
], | |
[ | |
`My mother's name is Sasha, she likes dogs.`, | |
{ | |
cleanedText: "My mother's name is Sasha, she likes dogs.", | |
corefResText: "My mother's name is Sasha, my mother likes dogs.", | |
coreferences: [ | |
{ | |
resolved: "My mother", | |
original: "she", | |
}, | |
], | |
mentions: [ | |
{ | |
index: 0, | |
type: "PRONOMINAL", | |
end: 2, | |
start: 0, | |
startToken: 0, | |
text: "My", | |
utterance: 0, | |
endToken: 1, | |
}, | |
{ | |
index: 1, | |
type: "NOMINAL", | |
end: 9, | |
start: 0, | |
startToken: 0, | |
text: "My mother", | |
utterance: 0, | |
endToken: 2, | |
}, | |
{ | |
index: 2, | |
type: "NOMINAL", | |
end: 16, | |
start: 0, | |
startToken: 0, | |
text: "My mother's name", | |
utterance: 0, | |
endToken: 4, | |
}, | |
{ | |
index: 3, | |
type: "PROPER", | |
end: 25, | |
start: 20, | |
startToken: 5, | |
text: "Sasha", | |
utterance: 0, | |
endToken: 6, | |
}, | |
{ | |
index: 4, | |
type: "PRONOMINAL", | |
end: 30, | |
start: 27, | |
startToken: 7, | |
text: "she", | |
utterance: 0, | |
endToken: 8, | |
}, | |
{ | |
index: 5, | |
type: "NOMINAL", | |
end: 41, | |
start: 37, | |
startToken: 9, | |
text: "dogs", | |
utterance: 0, | |
endToken: 10, | |
}, | |
], | |
singleScores: { | |
"0": null, | |
"1": 1.9246201814037063, | |
"2": 1.3833144431588633, | |
"3": 1.8293318485967687, | |
"4": 0.11171655922904344, | |
"5": 1.8179855402495786, | |
}, | |
pairScores: { | |
"0": {}, | |
"1": { | |
"0": -2.021441708531068, | |
}, | |
"2": { | |
"0": -2.4538823419832134, | |
"1": -1.5272053058795838, | |
}, | |
"3": { | |
"0": -2.2864554131219212, | |
"1": -1.5158990748985923, | |
"2": -1.5676019384720228, | |
}, | |
"4": { | |
"0": -1.077294938181586, | |
"1": 5.190687831349847, | |
"2": 1.3862198517098907, | |
"3": 1.5871185522743856, | |
}, | |
"5": { | |
"0": -2.957565366582327, | |
"1": -1.572206989880445, | |
"2": -1.5136893865248766, | |
"3": -2.295173505354227, | |
"4": -2.2454728131610056, | |
}, | |
}, | |
cleanedContext: "", | |
isResolved: true, | |
}, | |
], | |
]); | |
class Coref { | |
constructor(endpoint, opts) { | |
this.onStart = () => { }; | |
this.onSuccess = () => { }; | |
this.endpoint = endpoint; | |
if (opts.onStart) { | |
this.onStart = opts.onStart; | |
} | |
if (opts.onSuccess) { | |
this.onSuccess = opts.onSuccess; | |
} | |
window.addEventListener('resize', this.svgResize); | |
} | |
svgResize() { | |
if (!this.container || !this.svgContainer) { | |
return; | |
} | |
this.svgContainer.setAttribute('width', `${this.container.scrollWidth}`); | |
this.svgContainer.setAttribute('height', `${this.container.scrollHeight}`); | |
} | |
parse(text) { | |
this.onStart(); | |
if (COREF_HARDCODED_OUTPUTS.size) { | |
const output = COREF_HARDCODED_OUTPUTS.get(text) ?? [...COREF_HARDCODED_OUTPUTS.values()][0]; | |
setTimeout(() => { | |
this.onSuccess(); | |
this.render(output); | |
}, 300); | |
return; | |
} | |
const path = `${this.endpoint}?text=${encodeURIComponent(text)}`; | |
const request = new XMLHttpRequest(); | |
request.open('GET', path); | |
request.onload = () => { | |
if (request.status >= 200 && request.status < 400) { | |
this.onSuccess(); | |
const res = JSON.parse(request.responseText); | |
this.render(res); | |
} | |
else { | |
console.error('Error', request); | |
} | |
}; | |
request.send(); | |
} | |
render(res) { | |
const mentions = res.mentions; | |
for (const m of mentions) { | |
m.singleScore = res.singleScores[m.index] || undefined; | |
} | |
const markup = Displacy.render(res.cleanedText, mentions); | |
if (!this.container || !this.svgContainer) { | |
return; | |
} | |
this.container.innerHTML = `<div class="text">${markup}</div>`; | |
this.svgContainer.textContent = ""; | |
this.svgResize(); | |
window.container = this.container; | |
window.svgContainer = this.svgContainer; | |
const endY = document.querySelector('.container .text').getBoundingClientRect().top | |
- this.container.getBoundingClientRect().top | |
- 2; | |
SvgArrow.yArrows = endY; | |
for (const [__from, scores] of Object.entries(res.pairScores)) { | |
const from = parseInt(__from, 10); | |
for (const [__to, score] of Object.entries(scores)) { | |
const to = parseInt(__to, 10); | |
const markFrom = document.querySelector(`mark[data-index="${from}"]`); | |
const markTo = document.querySelector(`mark[data-index="${to}"]`); | |
const arrow = new SvgArrow(this.container, markFrom, markTo, score); | |
if (score >= Math.max(...Object.values(scores))) { | |
arrow.classNames.push('score-ok'); | |
const singleScore = res.singleScores[from]; | |
if (singleScore && score >= singleScore) { | |
arrow.classNames.push('score-best'); | |
} | |
} | |
this.svgContainer.appendChild(arrow.generate()); | |
} | |
} | |
document.querySelectorAll('.displacy-arrow.score-ok').forEach((arw) => { | |
this.svgContainer.appendChild(arw); | |
}); | |
} | |
} | |
class Displacy { | |
static sortSpans(spans) { | |
spans.sort((a, b) => { | |
if (a.start === b.start) { | |
return b.end - a.end; | |
} | |
return a.start - b.start; | |
}); | |
spans.forEach((s, i) => { | |
if (i < spans.length - 1) { | |
const sNext = spans[i + 1]; | |
if (s.start < sNext.start && s.end > sNext.start) { | |
console.log("ERROR", "Spans: strict overlapping"); | |
} | |
} | |
}); | |
} | |
static render(text, spans) { | |
this.sortSpans(spans); | |
const tags = {}; | |
const __addTag = (i, s, tag) => { | |
if (Array.isArray(tags[i])) { | |
tags[i].push({ span: s, tag: tag }); | |
} | |
else { | |
tags[i] = [{ span: s, tag: tag }]; | |
} | |
}; | |
for (const s of spans) { | |
__addTag(s.start, s, "start"); | |
__addTag(s.end, s, "end"); | |
} | |
let out = { | |
__content: "", | |
append(s) { | |
this.__content += s; | |
} | |
}; | |
let offset = 0; | |
const indexes = Object.keys(tags).map(k => parseInt(k, 10)).sort((a, b) => a - b); | |
for (const i of indexes) { | |
const spanTags = tags[i]; | |
if (i > offset) { | |
out.append(text.slice(offset, i)); | |
} | |
offset = i; | |
for (const sT of spanTags) { | |
if (sT.tag === "start") { | |
out.append(`<mark data-entity="${sT.span.type.toLowerCase()}" data-index="${sT.span.index}">`); | |
const singleScore = sT.span.singleScore; | |
if (singleScore) { | |
out.append(`<span class="single-score">${singleScore.toFixed(3)}</span>`); | |
} | |
} | |
else { | |
out.append(`</mark>`); | |
} | |
} | |
} | |
out.append(text.slice(offset, text.length)); | |
return out.__content; | |
} | |
} | |
class SvgArrow { | |
constructor(container, markFrom, markTo, score) { | |
this.classNames = []; | |
this.container = container; | |
this.markFrom = markFrom; | |
this.markTo = markTo; | |
this.score = score; | |
} | |
_el(tag, options) { | |
const { classnames = [], attributes = [], style = [], children = [], text, id, xlink } = options; | |
const ns = 'http://www.w3.org/2000/svg'; | |
const nsx = 'http://www.w3.org/1999/xlink'; | |
const el = document.createElementNS(ns, tag); | |
classnames.forEach(name => el.classList.add(name)); | |
attributes.forEach(([attr, value]) => el.setAttribute(attr, value)); | |
style.forEach(([prop, value]) => el.style[prop] = value); | |
if (xlink) | |
el.setAttributeNS(nsx, 'xlink:href', xlink); | |
if (text) | |
el.appendChild(document.createTextNode(text)); | |
if (id) | |
el.id = id; | |
children.forEach(child => el.appendChild(child)); | |
return el; | |
} | |
generate() { | |
const rand = Math.random().toString(36).substr(2, 8); | |
const startX = this.markTo.getBoundingClientRect().left | |
- this.container.getBoundingClientRect().left | |
+ this.markTo.getBoundingClientRect().width / 2; | |
const endX = this.markFrom.getBoundingClientRect().left | |
- this.container.getBoundingClientRect().left | |
+ this.markFrom.getBoundingClientRect().width / 2; | |
const curveY = Math.max(-50, SvgArrow.yArrows - (endX - startX) / 3.2); | |
return this._el('g', { | |
classnames: ['displacy-arrow'].concat(this.classNames), | |
children: [ | |
this._el('path', { | |
id: `arrow-${rand}`, | |
classnames: ['displacy-arc'], | |
attributes: [ | |
['d', `M${startX},${SvgArrow.yArrows} C${startX},${curveY} ${endX},${curveY} ${endX},${SvgArrow.yArrows}`], | |
['stroke-width', '2px'], | |
['fill', 'none'], | |
['stroke', 'currentColor'], | |
] | |
}), | |
this._el('text', { | |
attributes: [ | |
['dy', '1em'] | |
], | |
children: [ | |
this._el('textPath', { | |
xlink: `#arrow-${rand}`, | |
classnames: ['displacy-label'], | |
attributes: [ | |
['startOffset', '50%'], | |
['fill', 'currentColor'], | |
['text-anchor', 'middle'], | |
], | |
text: this.score.toFixed(2) | |
}) | |
] | |
}), | |
] | |
}); | |
} | |
} | |
SvgArrow.yArrows = 0; | |
const ENDPOINT = "/static-proxy?url=https%3A%2F%2Fcoref.huggingface.co%2Fcoref%26quot%3B%3C%2Fspan%3E%3B%3C!-- HTML_TAG_END --> | |
const DEFAULT_NLP_TEXT = () => { | |
const items = [ | |
`I love my father and my mother. They work hard. She is always nice but he is sometimes rude.`, | |
`My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.`, | |
`My mother's name is Sasha, she likes dogs.` | |
]; | |
return items[Math.floor(Math.random() * items.length)]; | |
}; | |
const loading = () => { | |
document.body.classList.toggle('loading'); | |
}; | |
const toggleDebug = () => { | |
document.body.classList.toggle('debug'); | |
const icons = document.querySelectorAll('.svg-checkbox'); | |
icons.forEach((icon) => { | |
icon.classList.toggle('hide'); | |
}); | |
window.localStorage.setItem('debug', document.body.classList.contains('debug').toString()); | |
}; | |
const coref = new Coref(ENDPOINT, { | |
onStart: loading, | |
onSuccess: loading, | |
}); | |
const getQueryVar = (key) => { | |
const query = window.location.search.substring(1); | |
const params = query.split('&').map(param => param.split('=')); | |
for (const param of params) { | |
if (param[0] === key) { | |
return decodeURIComponent(param[1]); | |
} | |
} | |
return undefined; | |
}; | |
const updateURL = (text) => { | |
history.pushState({ text: text }, "", `?text=${encodeURIComponent(text)}`); | |
}; | |
document.addEventListener('DOMContentLoaded', () => { | |
const $input = document.querySelector('input.input-message'); | |
const $form = document.querySelector('form.js-form'); | |
const $checkbox = document.querySelector('.js-checkbox'); | |
const $svgContainer = document.querySelector('.svg-container'); | |
coref.container = document.querySelector('.container'); | |
coref.svgContainer = $svgContainer; | |
{ | |
const queryText = getQueryVar('text'); | |
if (queryText) { | |
$input.value = queryText; | |
coref.parse(queryText); | |
} | |
else { | |
coref.parse(DEFAULT_NLP_TEXT()); | |
} | |
} | |
$input.addEventListener('keydown', (evt) => { | |
if (evt.charCode === 13) { | |
evt.preventDefault(); | |
$form.submit(); | |
} | |
}); | |
$form.addEventListener('submit', (evt) => { | |
evt.preventDefault(); | |
const text = ($input.value.length > 0) | |
? $input.value | |
: DEFAULT_NLP_TEXT(); | |
updateURL(text); | |
coref.parse(text); | |
}); | |
$checkbox.addEventListener('click', () => { | |
toggleDebug(); | |
}); | |
if (window.localStorage.getItem('debug') !== 'false') { | |
toggleDebug(); | |
} | |
}); | |