Spaces:
Running
Running
declare type MentionType= "PRONOMINAL" | "NOMINAL" | "PROPER" | "LIST"; | |
declare interface Mention { | |
index: number; | |
start: number; | |
end: number; | |
startToken: number; | |
endToken: number; | |
utterance: number; | |
type: MentionType; | |
text: string; | |
} | |
declare interface Coreference { | |
original: string; | |
resolved: string; | |
} | |
declare interface SpansEmbeddings { | |
Doc: string; | |
Mention: string[]; | |
MentionLeft: string[]; | |
MentionRight: string[]; | |
Sentence: string[]; | |
} | |
declare interface WordsEmbeddings { | |
MentionFirstWord: string; | |
MentionHead: string; | |
MentionLastWord: string; | |
MentionRootHead: string; | |
NextWord: string; | |
PreviousWord: string; | |
SecondNextWord: string; | |
SecondPreviousWord: string; | |
} | |
declare interface MentionFeatures { | |
MentionLength: number; | |
MentionNormLocation: number; | |
MentionType: string; | |
IsMentionNested: number; | |
DocGenre?: string | null; | |
} | |
declare interface MentionsPairFeatures { | |
SameSpeaker: number; | |
AntMatchMentionSpeaker: number; | |
MentionMatchSpeaker: number; | |
HeadsAgree: number; | |
ExactStringMatch: number; | |
RelaxedStringMatch: number; | |
SentenceDistance: number; | |
MentionDistance: number; | |
Overlapping: number; | |
M1Features: MentionFeatures; | |
M2Features: MentionFeatures; | |
DocGenre: string | null; | |
} | |
declare interface SingleFeatures { | |
features: MentionFeatures; | |
spansEmbeddings: SpansEmbeddings; | |
wordsEmbeddings: WordsEmbeddings; | |
} | |
declare interface PairFeatures { | |
pairFeatures: MentionsPairFeatures; | |
antecedentSpansEmbeddings: SpansEmbeddings; | |
antecedentWordsEmbeddings: WordsEmbeddings; | |
mentionSpansEmbeddings: SpansEmbeddings; | |
mentionWordsEmbeddings: WordsEmbeddings; | |
} | |
declare interface CorefResponse { | |
cleanedText: string; | |
corefResText: string; | |
coreferences: Coreference[]; | |
mentions: Mention[]; | |
singleScores: { [id: number]: number | null }; /// Is this mention likely to be a single mention (w/o any corefs). `id` is a Mention's `index` | |
pairScores: { [id: number]: { [id: number]: number } }; /// Pair-wise score, in `{ from: { to: ... } }` format. Non-directed arcs. | |
/// Single scores are to be compared to the set of pairScores (for the same mention). | |
/// If it's higher than every pair score, it's a single mention. | |
cleanedContext: string; /// Cleaned version of the context. | |
// singleFeatures: { [id: number]: SingleFeatures | null }; | |
// pairFeatures: { [id: number]: { [id: number]: PairFeatures } }; | |
isResolved: boolean; | |
} | |
const COREF_HARDCODED_OUTPUTS = new Map<string, CorefResponse>([ | |
[ | |
`I love my father and my mother. They work hard. She is always nice but he is sometimes rude.`, | |
{ | |
cleanedText: | |
"I love my father and my mother. They work hard. She is always nice but he is sometimes rude.", | |
corefResText: | |
"I love my father and my mother. They work hard. my mother is always nice but he is sometimes rude.", | |
coreferences: [ | |
{ | |
resolved: "my mother", | |
original: "She", | |
}, | |
], | |
mentions: [ | |
{ | |
index: 0, | |
type: "PRONOMINAL", | |
end: 1, | |
start: 0, | |
startToken: 0, | |
text: "I", | |
utterance: 0, | |
endToken: 1, | |
}, | |
{ | |
index: 1, | |
type: "PRONOMINAL", | |
end: 9, | |
start: 7, | |
startToken: 2, | |
text: "my", | |
utterance: 0, | |
endToken: 3, | |
}, | |
{ | |
index: 2, | |
type: "LIST", | |
end: 30, | |
start: 7, | |
startToken: 2, | |
text: "my father and my mother", | |
utterance: 0, | |
endToken: 7, | |
}, | |
{ | |
index: 3, | |
type: "NOMINAL", | |
end: 16, | |
start: 7, | |
startToken: 2, | |
text: "my father", | |
utterance: 0, | |
endToken: 4, | |
}, | |
{ | |
index: 4, | |
type: "PRONOMINAL", | |
end: 23, | |
start: 21, | |
startToken: 5, | |
text: "my", | |
utterance: 0, | |
endToken: 6, | |
}, | |
{ | |
index: 5, | |
type: "NOMINAL", | |
end: 30, | |
start: 21, | |
startToken: 5, | |
text: "my mother", | |
utterance: 0, | |
endToken: 7, | |
}, | |
{ | |
index: 6, | |
type: "PRONOMINAL", | |
end: 36, | |
start: 32, | |
startToken: 8, | |
text: "They", | |
utterance: 0, | |
endToken: 9, | |
}, | |
{ | |
index: 7, | |
type: "PRONOMINAL", | |
end: 51, | |
start: 48, | |
startToken: 12, | |
text: "She", | |
utterance: 0, | |
endToken: 13, | |
}, | |
{ | |
index: 8, | |
type: "PRONOMINAL", | |
end: 73, | |
start: 71, | |
startToken: 17, | |
text: "he", | |
utterance: 0, | |
endToken: 18, | |
}, | |
], | |
singleScores: { | |
"0": null, | |
"1": -0.5316791573216993, | |
"2": 1.5312658152183065, | |
"3": 1.555544240226482, | |
"4": -1.3024868053674725, | |
"5": 1.8934082334098847, | |
"6": -0.9070692483062055, | |
"7": 0.052800267274213275, | |
"8": -0.14679673122527748, | |
}, | |
pairScores: { | |
"0": {}, | |
"1": { | |
"0": 15.268471128411697, | |
}, | |
"2": { | |
"0": -4.4230603182609896, | |
"1": -3.382446088190441, | |
}, | |
"3": { | |
"0": -4.2304546215104555, | |
"1": -3.5681677896088786, | |
"2": -1.9470202037608262, | |
}, | |
"4": { | |
"0": 11.654522570777317, | |
"1": 13.455601870537567, | |
"2": -3.218918301345336, | |
"3": -3.882381584104524, | |
}, | |
"5": { | |
"0": -3.9280501278811983, | |
"1": -4.426880262361277, | |
"2": -1.7714693884323367, | |
"3": -2.722532370602323, | |
"4": -3.290164176455163, | |
}, | |
"6": { | |
"0": -4.492101447800931, | |
"1": -4.636904674331316, | |
"2": 3.1158072056943666, | |
"3": -2.7375757747875573, | |
"4": -4.689981185699828, | |
"5": -2.6728186848475537, | |
}, | |
"7": { | |
"0": -3.197215354228037, | |
"1": -3.538538702704479, | |
"2": -0.02408947507481729, | |
"3": 0.3052410603657605, | |
"4": -3.519641485034609, | |
"5": 1.8101046215415115, | |
"6": -3.1353342036917917, | |
}, | |
"8": { | |
"0": -0.08532621450323319, | |
"1": 5.371002989344198, | |
"2": -1.4091179987286686, | |
"3": 3.152921411948177, | |
"4": 2.268706305216419, | |
"5": -2.340743897439996, | |
"6": -2.8835496283480597, | |
"7": -3.4832126005315334, | |
}, | |
}, | |
cleanedContext: "", | |
isResolved: true, | |
}, | |
], | |
[ | |
`My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.`, | |
{ | |
cleanedText: | |
"My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.", | |
corefResText: | |
"My sister is swimming with my sister classmates. her classmates are not bad, but my sister is better. I love watching my sister swim.", | |
coreferences: [ | |
{ | |
resolved: "My sister", | |
original: "she", | |
}, | |
{ | |
resolved: "My sister", | |
original: "her", | |
}, | |
{ | |
resolved: "her classmates", | |
original: "They", | |
}, | |
{ | |
resolved: "My sister", | |
original: "her", | |
}, | |
], | |
mentions: [ | |
{ | |
index: 0, | |
type: "PRONOMINAL", | |
end: 2, | |
start: 0, | |
startToken: 0, | |
text: "My", | |
utterance: 0, | |
endToken: 1, | |
}, | |
{ | |
index: 1, | |
type: "NOMINAL", | |
end: 9, | |
start: 0, | |
startToken: 0, | |
text: "My sister", | |
utterance: 0, | |
endToken: 2, | |
}, | |
{ | |
index: 2, | |
type: "PRONOMINAL", | |
end: 30, | |
start: 27, | |
startToken: 5, | |
text: "her", | |
utterance: 0, | |
endToken: 6, | |
}, | |
{ | |
index: 3, | |
type: "NOMINAL", | |
end: 41, | |
start: 27, | |
startToken: 5, | |
text: "her classmates", | |
utterance: 0, | |
endToken: 7, | |
}, | |
{ | |
index: 4, | |
type: "PRONOMINAL", | |
end: 47, | |
start: 43, | |
startToken: 8, | |
text: "They", | |
utterance: 0, | |
endToken: 9, | |
}, | |
{ | |
index: 5, | |
type: "PRONOMINAL", | |
end: 68, | |
start: 65, | |
startToken: 14, | |
text: "she", | |
utterance: 0, | |
endToken: 15, | |
}, | |
{ | |
index: 6, | |
type: "PRONOMINAL", | |
end: 81, | |
start: 80, | |
startToken: 18, | |
text: "I", | |
utterance: 0, | |
endToken: 19, | |
}, | |
{ | |
index: 7, | |
type: "PRONOMINAL", | |
end: 99, | |
start: 96, | |
startToken: 21, | |
text: "her", | |
utterance: 0, | |
endToken: 22, | |
}, | |
], | |
singleScores: { | |
"0": null, | |
"1": 1.609437735243254, | |
"2": -1.1017402175324822, | |
"3": 1.4347901008486401, | |
"4": -0.02895837171142801, | |
"5": -0.4266623545401909, | |
"6": 0.009921976322164627, | |
"7": -1.8629830475049451, | |
}, | |
pairScores: { | |
"0": {}, | |
"1": { | |
"0": -2.2413815226574703, | |
}, | |
"2": { | |
"0": -2.409825572927252, | |
"1": 5.707592445811339, | |
}, | |
"3": { | |
"0": -3.0653216162902854, | |
"1": -1.6904548462117184, | |
"2": -1.8322836987315447, | |
}, | |
"4": { | |
"0": -3.688547511940379, | |
"1": -2.0587007889253717, | |
"2": -3.370481889890517, | |
"3": 2.67729831167075, | |
}, | |
"5": { | |
"0": -2.6457134524861243, | |
"1": 8.41568336157475, | |
"2": 5.457479617210075, | |
"3": 0.5541345662624297, | |
"4": -2.952959651402653, | |
}, | |
"6": { | |
"0": 6.483305186430136, | |
"1": -2.5309543937239427, | |
"2": -2.4954945953746566, | |
"3": -2.812183970273315, | |
"4": -2.998588381716906, | |
"5": -2.2723718581884205, | |
}, | |
"7": { | |
"0": -2.9154581227140457, | |
"1": 9.352887851205328, | |
"2": 9.844018411095597, | |
"3": 1.8138255060465474, | |
"4": -3.3396902374034765, | |
"5": 10.035481487601054, | |
"6": -3.0660799723685312, | |
}, | |
}, | |
cleanedContext: "", | |
isResolved: true, | |
}, | |
], | |
[ | |
`My mother's name is Sasha, she likes dogs.`, | |
{ | |
cleanedText: "My mother's name is Sasha, she likes dogs.", | |
corefResText: "My mother's name is Sasha, my mother likes dogs.", | |
coreferences: [ | |
{ | |
resolved: "My mother", | |
original: "she", | |
}, | |
], | |
mentions: [ | |
{ | |
index: 0, | |
type: "PRONOMINAL", | |
end: 2, | |
start: 0, | |
startToken: 0, | |
text: "My", | |
utterance: 0, | |
endToken: 1, | |
}, | |
{ | |
index: 1, | |
type: "NOMINAL", | |
end: 9, | |
start: 0, | |
startToken: 0, | |
text: "My mother", | |
utterance: 0, | |
endToken: 2, | |
}, | |
{ | |
index: 2, | |
type: "NOMINAL", | |
end: 16, | |
start: 0, | |
startToken: 0, | |
text: "My mother's name", | |
utterance: 0, | |
endToken: 4, | |
}, | |
{ | |
index: 3, | |
type: "PROPER", | |
end: 25, | |
start: 20, | |
startToken: 5, | |
text: "Sasha", | |
utterance: 0, | |
endToken: 6, | |
}, | |
{ | |
index: 4, | |
type: "PRONOMINAL", | |
end: 30, | |
start: 27, | |
startToken: 7, | |
text: "she", | |
utterance: 0, | |
endToken: 8, | |
}, | |
{ | |
index: 5, | |
type: "NOMINAL", | |
end: 41, | |
start: 37, | |
startToken: 9, | |
text: "dogs", | |
utterance: 0, | |
endToken: 10, | |
}, | |
], | |
singleScores: { | |
"0": null, | |
"1": 1.9246201814037063, | |
"2": 1.3833144431588633, | |
"3": 1.8293318485967687, | |
"4": 0.11171655922904344, | |
"5": 1.8179855402495786, | |
}, | |
pairScores: { | |
"0": {}, | |
"1": { | |
"0": -2.021441708531068, | |
}, | |
"2": { | |
"0": -2.4538823419832134, | |
"1": -1.5272053058795838, | |
}, | |
"3": { | |
"0": -2.2864554131219212, | |
"1": -1.5158990748985923, | |
"2": -1.5676019384720228, | |
}, | |
"4": { | |
"0": -1.077294938181586, | |
"1": 5.190687831349847, | |
"2": 1.3862198517098907, | |
"3": 1.5871185522743856, | |
}, | |
"5": { | |
"0": -2.957565366582327, | |
"1": -1.572206989880445, | |
"2": -1.5136893865248766, | |
"3": -2.295173505354227, | |
"4": -2.2454728131610056, | |
}, | |
}, | |
cleanedContext: "", | |
isResolved: true, | |
}, | |
], | |
]); | |
class Coref { | |
endpoint: string; | |
onStart = () => {}; | |
onSuccess = () => {}; | |
container?: HTMLElement; | |
svgContainer?: SVGSVGElement; | |
constructor(endpoint: string, opts: { onStart?: () => void, onSuccess?: () => void }) { | |
this.endpoint = endpoint; | |
if (opts.onStart) { | |
this.onStart = opts.onStart; | |
} | |
if (opts.onSuccess) { | |
this.onSuccess = opts.onSuccess; | |
} | |
window.addEventListener('resize', this.svgResize); | |
} | |
svgResize() { | |
if (!this.container || !this.svgContainer) { return ; } | |
this.svgContainer.setAttribute('width', `${this.container.scrollWidth}`); /// Caution: not offsetWidth. | |
this.svgContainer.setAttribute('height', `${this.container.scrollHeight}`); | |
} | |
parse(text: string) { | |
this.onStart(); | |
if (COREF_HARDCODED_OUTPUTS.size) { | |
const output = COREF_HARDCODED_OUTPUTS.get(text) ?? [...COREF_HARDCODED_OUTPUTS.values()][0]; | |
setTimeout(() => { | |
this.onSuccess(); | |
this.render(output); | |
}, 300); | |
return ; | |
} | |
const path = `${this.endpoint}?text=${encodeURIComponent(text)}`; | |
const request = new XMLHttpRequest(); | |
request.open('GET', path); | |
request.onload = () => { | |
if (request.status >= 200 && request.status < 400) { | |
this.onSuccess(); | |
const res: CorefResponse = JSON.parse(request.responseText); | |
this.render(res); | |
} | |
else { | |
console.error('Error', request); | |
} | |
}; | |
request.send(); | |
} | |
render(res: CorefResponse) { | |
const mentions = (<any>res).mentions; // We will sort them in Displacy | |
for (const m of mentions) { | |
// Let's add each mention's singleScore | |
m.singleScore = res.singleScores[m.index] || undefined; | |
} | |
const markup = Displacy.render(res.cleanedText, mentions); | |
if (!this.container || !this.svgContainer) { return ; } | |
this.container.innerHTML = `<div class="text">${markup}</div>`; | |
/// SVG | |
this.svgContainer.textContent = ""; // Empty | |
this.svgResize(); | |
(<any>window).container = this.container; | |
(<any>window).svgContainer = this.svgContainer; | |
/** | |
* Arrows preparation | |
*/ | |
const endY = document.querySelector('.container .text')!.getBoundingClientRect().top | |
- this.container.getBoundingClientRect().top | |
- 2; | |
SvgArrow.yArrows = endY; | |
/** | |
* Render arrows | |
*/ | |
for (const [__from, scores] of Object.entries(res.pairScores)) { | |
const from = parseInt(__from, 10); /// Convert all string keys to ints... | |
for (const [__to, score] of Object.entries(scores)) { | |
const to = parseInt(__to, 10); | |
// Positions: | |
const markFrom = document.querySelector(`mark[data-index="${from}"]`) as HTMLElement; | |
const markTo = document.querySelector(`mark[data-index="${to}"]`) as HTMLElement; | |
// console.log(markFrom, markTo, score); // todo remove | |
const arrow = new SvgArrow(this.container, markFrom, markTo, score); | |
// Is this a resolved coref? | |
if (score >= Math.max(...Object.values(scores))) { | |
arrow.classNames.push('score-ok'); // Best pairwise score | |
// Is it the better than the singleScore? | |
const singleScore = res.singleScores[from]; | |
if (singleScore && score >= singleScore) { | |
arrow.classNames.push('score-best'); | |
} | |
} | |
this.svgContainer.appendChild(arrow.generate()); | |
} | |
} | |
// Finally do a second pass to move all red/orange arrows to the top of the grey ones. | |
// Finally do a second pass to move all red/orange arrows to the top of the grey ones. | |
(<any>document.querySelectorAll('.displacy-arrow.score-ok')).forEach((arw) => { | |
this.svgContainer!.appendChild(arw); | |
}); | |
} | |
} | |