coref / js-src /Coref.ts
julien-c's picture
julien-c HF staff
Hardcode static outputs to get rid of API
83140f2 verified
declare type MentionType= "PRONOMINAL" | "NOMINAL" | "PROPER" | "LIST";
declare interface Mention {
index: number;
start: number;
end: number;
startToken: number;
endToken: number;
utterance: number;
type: MentionType;
text: string;
}
declare interface Coreference {
original: string;
resolved: string;
}
declare interface SpansEmbeddings {
Doc: string;
Mention: string[];
MentionLeft: string[];
MentionRight: string[];
Sentence: string[];
}
declare interface WordsEmbeddings {
MentionFirstWord: string;
MentionHead: string;
MentionLastWord: string;
MentionRootHead: string;
NextWord: string;
PreviousWord: string;
SecondNextWord: string;
SecondPreviousWord: string;
}
declare interface MentionFeatures {
MentionLength: number;
MentionNormLocation: number;
MentionType: string;
IsMentionNested: number;
DocGenre?: string | null;
}
declare interface MentionsPairFeatures {
SameSpeaker: number;
AntMatchMentionSpeaker: number;
MentionMatchSpeaker: number;
HeadsAgree: number;
ExactStringMatch: number;
RelaxedStringMatch: number;
SentenceDistance: number;
MentionDistance: number;
Overlapping: number;
M1Features: MentionFeatures;
M2Features: MentionFeatures;
DocGenre: string | null;
}
declare interface SingleFeatures {
features: MentionFeatures;
spansEmbeddings: SpansEmbeddings;
wordsEmbeddings: WordsEmbeddings;
}
declare interface PairFeatures {
pairFeatures: MentionsPairFeatures;
antecedentSpansEmbeddings: SpansEmbeddings;
antecedentWordsEmbeddings: WordsEmbeddings;
mentionSpansEmbeddings: SpansEmbeddings;
mentionWordsEmbeddings: WordsEmbeddings;
}
declare interface CorefResponse {
cleanedText: string;
corefResText: string;
coreferences: Coreference[];
mentions: Mention[];
singleScores: { [id: number]: number | null }; /// Is this mention likely to be a single mention (w/o any corefs). `id` is a Mention's `index`
pairScores: { [id: number]: { [id: number]: number } }; /// Pair-wise score, in `{ from: { to: ... } }` format. Non-directed arcs.
/// Single scores are to be compared to the set of pairScores (for the same mention).
/// If it's higher than every pair score, it's a single mention.
cleanedContext: string; /// Cleaned version of the context.
// singleFeatures: { [id: number]: SingleFeatures | null };
// pairFeatures: { [id: number]: { [id: number]: PairFeatures } };
isResolved: boolean;
}
const COREF_HARDCODED_OUTPUTS = new Map<string, CorefResponse>([
[
`I love my father and my mother. They work hard. She is always nice but he is sometimes rude.`,
{
cleanedText:
"I love my father and my mother. They work hard. She is always nice but he is sometimes rude.",
corefResText:
"I love my father and my mother. They work hard. my mother is always nice but he is sometimes rude.",
coreferences: [
{
resolved: "my mother",
original: "She",
},
],
mentions: [
{
index: 0,
type: "PRONOMINAL",
end: 1,
start: 0,
startToken: 0,
text: "I",
utterance: 0,
endToken: 1,
},
{
index: 1,
type: "PRONOMINAL",
end: 9,
start: 7,
startToken: 2,
text: "my",
utterance: 0,
endToken: 3,
},
{
index: 2,
type: "LIST",
end: 30,
start: 7,
startToken: 2,
text: "my father and my mother",
utterance: 0,
endToken: 7,
},
{
index: 3,
type: "NOMINAL",
end: 16,
start: 7,
startToken: 2,
text: "my father",
utterance: 0,
endToken: 4,
},
{
index: 4,
type: "PRONOMINAL",
end: 23,
start: 21,
startToken: 5,
text: "my",
utterance: 0,
endToken: 6,
},
{
index: 5,
type: "NOMINAL",
end: 30,
start: 21,
startToken: 5,
text: "my mother",
utterance: 0,
endToken: 7,
},
{
index: 6,
type: "PRONOMINAL",
end: 36,
start: 32,
startToken: 8,
text: "They",
utterance: 0,
endToken: 9,
},
{
index: 7,
type: "PRONOMINAL",
end: 51,
start: 48,
startToken: 12,
text: "She",
utterance: 0,
endToken: 13,
},
{
index: 8,
type: "PRONOMINAL",
end: 73,
start: 71,
startToken: 17,
text: "he",
utterance: 0,
endToken: 18,
},
],
singleScores: {
"0": null,
"1": -0.5316791573216993,
"2": 1.5312658152183065,
"3": 1.555544240226482,
"4": -1.3024868053674725,
"5": 1.8934082334098847,
"6": -0.9070692483062055,
"7": 0.052800267274213275,
"8": -0.14679673122527748,
},
pairScores: {
"0": {},
"1": {
"0": 15.268471128411697,
},
"2": {
"0": -4.4230603182609896,
"1": -3.382446088190441,
},
"3": {
"0": -4.2304546215104555,
"1": -3.5681677896088786,
"2": -1.9470202037608262,
},
"4": {
"0": 11.654522570777317,
"1": 13.455601870537567,
"2": -3.218918301345336,
"3": -3.882381584104524,
},
"5": {
"0": -3.9280501278811983,
"1": -4.426880262361277,
"2": -1.7714693884323367,
"3": -2.722532370602323,
"4": -3.290164176455163,
},
"6": {
"0": -4.492101447800931,
"1": -4.636904674331316,
"2": 3.1158072056943666,
"3": -2.7375757747875573,
"4": -4.689981185699828,
"5": -2.6728186848475537,
},
"7": {
"0": -3.197215354228037,
"1": -3.538538702704479,
"2": -0.02408947507481729,
"3": 0.3052410603657605,
"4": -3.519641485034609,
"5": 1.8101046215415115,
"6": -3.1353342036917917,
},
"8": {
"0": -0.08532621450323319,
"1": 5.371002989344198,
"2": -1.4091179987286686,
"3": 3.152921411948177,
"4": 2.268706305216419,
"5": -2.340743897439996,
"6": -2.8835496283480597,
"7": -3.4832126005315334,
},
},
cleanedContext: "",
isResolved: true,
},
],
[
`My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.`,
{
cleanedText:
"My sister is swimming with her classmates. They are not bad, but she is better. I love watching her swim.",
corefResText:
"My sister is swimming with my sister classmates. her classmates are not bad, but my sister is better. I love watching my sister swim.",
coreferences: [
{
resolved: "My sister",
original: "she",
},
{
resolved: "My sister",
original: "her",
},
{
resolved: "her classmates",
original: "They",
},
{
resolved: "My sister",
original: "her",
},
],
mentions: [
{
index: 0,
type: "PRONOMINAL",
end: 2,
start: 0,
startToken: 0,
text: "My",
utterance: 0,
endToken: 1,
},
{
index: 1,
type: "NOMINAL",
end: 9,
start: 0,
startToken: 0,
text: "My sister",
utterance: 0,
endToken: 2,
},
{
index: 2,
type: "PRONOMINAL",
end: 30,
start: 27,
startToken: 5,
text: "her",
utterance: 0,
endToken: 6,
},
{
index: 3,
type: "NOMINAL",
end: 41,
start: 27,
startToken: 5,
text: "her classmates",
utterance: 0,
endToken: 7,
},
{
index: 4,
type: "PRONOMINAL",
end: 47,
start: 43,
startToken: 8,
text: "They",
utterance: 0,
endToken: 9,
},
{
index: 5,
type: "PRONOMINAL",
end: 68,
start: 65,
startToken: 14,
text: "she",
utterance: 0,
endToken: 15,
},
{
index: 6,
type: "PRONOMINAL",
end: 81,
start: 80,
startToken: 18,
text: "I",
utterance: 0,
endToken: 19,
},
{
index: 7,
type: "PRONOMINAL",
end: 99,
start: 96,
startToken: 21,
text: "her",
utterance: 0,
endToken: 22,
},
],
singleScores: {
"0": null,
"1": 1.609437735243254,
"2": -1.1017402175324822,
"3": 1.4347901008486401,
"4": -0.02895837171142801,
"5": -0.4266623545401909,
"6": 0.009921976322164627,
"7": -1.8629830475049451,
},
pairScores: {
"0": {},
"1": {
"0": -2.2413815226574703,
},
"2": {
"0": -2.409825572927252,
"1": 5.707592445811339,
},
"3": {
"0": -3.0653216162902854,
"1": -1.6904548462117184,
"2": -1.8322836987315447,
},
"4": {
"0": -3.688547511940379,
"1": -2.0587007889253717,
"2": -3.370481889890517,
"3": 2.67729831167075,
},
"5": {
"0": -2.6457134524861243,
"1": 8.41568336157475,
"2": 5.457479617210075,
"3": 0.5541345662624297,
"4": -2.952959651402653,
},
"6": {
"0": 6.483305186430136,
"1": -2.5309543937239427,
"2": -2.4954945953746566,
"3": -2.812183970273315,
"4": -2.998588381716906,
"5": -2.2723718581884205,
},
"7": {
"0": -2.9154581227140457,
"1": 9.352887851205328,
"2": 9.844018411095597,
"3": 1.8138255060465474,
"4": -3.3396902374034765,
"5": 10.035481487601054,
"6": -3.0660799723685312,
},
},
cleanedContext: "",
isResolved: true,
},
],
[
`My mother's name is Sasha, she likes dogs.`,
{
cleanedText: "My mother's name is Sasha, she likes dogs.",
corefResText: "My mother's name is Sasha, my mother likes dogs.",
coreferences: [
{
resolved: "My mother",
original: "she",
},
],
mentions: [
{
index: 0,
type: "PRONOMINAL",
end: 2,
start: 0,
startToken: 0,
text: "My",
utterance: 0,
endToken: 1,
},
{
index: 1,
type: "NOMINAL",
end: 9,
start: 0,
startToken: 0,
text: "My mother",
utterance: 0,
endToken: 2,
},
{
index: 2,
type: "NOMINAL",
end: 16,
start: 0,
startToken: 0,
text: "My mother's name",
utterance: 0,
endToken: 4,
},
{
index: 3,
type: "PROPER",
end: 25,
start: 20,
startToken: 5,
text: "Sasha",
utterance: 0,
endToken: 6,
},
{
index: 4,
type: "PRONOMINAL",
end: 30,
start: 27,
startToken: 7,
text: "she",
utterance: 0,
endToken: 8,
},
{
index: 5,
type: "NOMINAL",
end: 41,
start: 37,
startToken: 9,
text: "dogs",
utterance: 0,
endToken: 10,
},
],
singleScores: {
"0": null,
"1": 1.9246201814037063,
"2": 1.3833144431588633,
"3": 1.8293318485967687,
"4": 0.11171655922904344,
"5": 1.8179855402495786,
},
pairScores: {
"0": {},
"1": {
"0": -2.021441708531068,
},
"2": {
"0": -2.4538823419832134,
"1": -1.5272053058795838,
},
"3": {
"0": -2.2864554131219212,
"1": -1.5158990748985923,
"2": -1.5676019384720228,
},
"4": {
"0": -1.077294938181586,
"1": 5.190687831349847,
"2": 1.3862198517098907,
"3": 1.5871185522743856,
},
"5": {
"0": -2.957565366582327,
"1": -1.572206989880445,
"2": -1.5136893865248766,
"3": -2.295173505354227,
"4": -2.2454728131610056,
},
},
cleanedContext: "",
isResolved: true,
},
],
]);
class Coref {
endpoint: string;
onStart = () => {};
onSuccess = () => {};
container?: HTMLElement;
svgContainer?: SVGSVGElement;
constructor(endpoint: string, opts: { onStart?: () => void, onSuccess?: () => void }) {
this.endpoint = endpoint;
if (opts.onStart) {
this.onStart = opts.onStart;
}
if (opts.onSuccess) {
this.onSuccess = opts.onSuccess;
}
window.addEventListener('resize', this.svgResize);
}
svgResize() {
if (!this.container || !this.svgContainer) { return ; }
this.svgContainer.setAttribute('width', `${this.container.scrollWidth}`); /// Caution: not offsetWidth.
this.svgContainer.setAttribute('height', `${this.container.scrollHeight}`);
}
parse(text: string) {
this.onStart();
if (COREF_HARDCODED_OUTPUTS.size) {
const output = COREF_HARDCODED_OUTPUTS.get(text) ?? [...COREF_HARDCODED_OUTPUTS.values()][0];
setTimeout(() => {
this.onSuccess();
this.render(output);
}, 300);
return ;
}
const path = `${this.endpoint}?text=${encodeURIComponent(text)}`;
const request = new XMLHttpRequest();
request.open('GET', path);
request.onload = () => {
if (request.status >= 200 && request.status < 400) {
this.onSuccess();
const res: CorefResponse = JSON.parse(request.responseText);
this.render(res);
}
else {
console.error('Error', request);
}
};
request.send();
}
render(res: CorefResponse) {
const mentions = (<any>res).mentions; // We will sort them in Displacy
for (const m of mentions) {
// Let's add each mention's singleScore
m.singleScore = res.singleScores[m.index] || undefined;
}
const markup = Displacy.render(res.cleanedText, mentions);
if (!this.container || !this.svgContainer) { return ; }
this.container.innerHTML = `<div class="text">${markup}</div>`;
/// SVG
this.svgContainer.textContent = ""; // Empty
this.svgResize();
(<any>window).container = this.container;
(<any>window).svgContainer = this.svgContainer;
/**
* Arrows preparation
*/
const endY = document.querySelector('.container .text')!.getBoundingClientRect().top
- this.container.getBoundingClientRect().top
- 2;
SvgArrow.yArrows = endY;
/**
* Render arrows
*/
for (const [__from, scores] of Object.entries(res.pairScores)) {
const from = parseInt(__from, 10); /// Convert all string keys to ints...
for (const [__to, score] of Object.entries(scores)) {
const to = parseInt(__to, 10);
// Positions:
const markFrom = document.querySelector(`mark[data-index="${from}"]`) as HTMLElement;
const markTo = document.querySelector(`mark[data-index="${to}"]`) as HTMLElement;
// console.log(markFrom, markTo, score); // todo remove
const arrow = new SvgArrow(this.container, markFrom, markTo, score);
// Is this a resolved coref?
if (score >= Math.max(...Object.values(scores))) {
arrow.classNames.push('score-ok'); // Best pairwise score
// Is it the better than the singleScore?
const singleScore = res.singleScores[from];
if (singleScore && score >= singleScore) {
arrow.classNames.push('score-best');
}
}
this.svgContainer.appendChild(arrow.generate());
}
}
// Finally do a second pass to move all red/orange arrows to the top of the grey ones.
// Finally do a second pass to move all red/orange arrows to the top of the grey ones.
(<any>document.querySelectorAll('.displacy-arrow.score-ok')).forEach((arw) => {
this.svgContainer!.appendChild(arw);
});
}
}