browndw's picture
Update spaCy pipeline
fa7c54c
raw
history blame
7.14 kB
{
"lang":"en",
"name":"docusco_spacy_fc_trf",
"version":"1.0",
"description":"English pipeline for part-of-speech and rhetorical tagging.",
"author":"David Brown",
"email":"[email protected]",
"url":"https://browndw.github.io/docuscope-docs/",
"license":"MIT",
"spacy_version":">=3.4.3,<3.5.0",
"spacy_git_version":"Unknown",
"vectors":{
"width":0,
"vectors":0,
"keys":0,
"name":null
},
"labels":{
"transformer":[
],
"tagger":[
"APPGE",
"AT",
"AT1",
"BCL21",
"BCL22",
"CC",
"CCB",
"CS",
"CS21",
"CS22",
"CS31",
"CS32",
"CS33",
"CS41",
"CS42",
"CS43",
"CS44",
"CSA",
"CSN",
"CST",
"CSW",
"CSW31",
"CSW32",
"CSW33",
"DA",
"DA1",
"DA2",
"DAR",
"DAT",
"DB",
"DB2",
"DD",
"DD1",
"DD2",
"DDQ",
"DDQGE",
"DDQV",
"DDQV31",
"DDQV32",
"DDQV33",
"EX",
"FO",
"FU",
"FW",
"GE",
"IF",
"II",
"II21",
"II22",
"II31",
"II32",
"II33",
"II41",
"II42",
"II43",
"II44",
"IO",
"IW",
"JJ",
"JJ21",
"JJ22",
"JJ31",
"JJ32",
"JJ33",
"JJR",
"JJT",
"JK",
"MC",
"MC1",
"MC2",
"MC221",
"MC222",
"MCMC",
"MD",
"MF",
"ND1",
"NN",
"NN1",
"NN121",
"NN122",
"NN131",
"NN132",
"NN133",
"NN141",
"NN142",
"NN143",
"NN144",
"NN2",
"NN21",
"NN22",
"NN221",
"NN222",
"NN231",
"NN232",
"NN233",
"NN31",
"NN33",
"NNA",
"NNB",
"NNL1",
"NNL2",
"NNO",
"NNO2",
"NNT1",
"NNT2",
"NNU",
"NNU1",
"NNU2",
"NNU21",
"NNU22",
"NP",
"NP1",
"NP2",
"NPD1",
"NPD2",
"NPM1",
"NPM2",
"PN",
"PN1",
"PN121",
"PN122",
"PN21",
"PN22",
"PNQO",
"PNQS",
"PNQS31",
"PNQS32",
"PNQS33",
"PNQV",
"PNX1",
"PPGE",
"PPH1",
"PPHO1",
"PPHO2",
"PPHS1",
"PPHS2",
"PPIO1",
"PPIO2",
"PPIS1",
"PPIS2",
"PPX1",
"PPX121",
"PPX122",
"PPX2",
"PPX221",
"PPX222",
"PPY",
"RA",
"RA21",
"RA22",
"REX",
"REX21",
"REX22",
"REX41",
"REX42",
"REX43",
"REX44",
"RG",
"RG21",
"RG22",
"RGQ",
"RGQV",
"RGQV31",
"RGQV32",
"RGQV33",
"RGR",
"RGT",
"RL",
"RL21",
"RL22",
"RP",
"RPK",
"RR",
"RR21",
"RR22",
"RR31",
"RR32",
"RR33",
"RR41",
"RR42",
"RR43",
"RR44",
"RR51",
"RR52",
"RR53",
"RR54",
"RR55",
"RRQ",
"RRQV",
"RRQV31",
"RRQV32",
"RRQV33",
"RRR",
"RRT",
"RT",
"RT21",
"RT22",
"RT31",
"RT32",
"RT33",
"RT41",
"RT42",
"RT43",
"RT44",
"TO",
"UH",
"UH21",
"UH22",
"UH31",
"UH32",
"UH33",
"VB0",
"VBDR",
"VBDZ",
"VBG",
"VBI",
"VBM",
"VBN",
"VBR",
"VBZ",
"VD0",
"VDD",
"VDG",
"VDI",
"VDN",
"VDZ",
"VH0",
"VHD",
"VHG",
"VHI",
"VHN",
"VHZ",
"VM",
"VM21",
"VM22",
"VMK",
"VV0",
"VVD",
"VVG",
"VVGK",
"VVI",
"VVN",
"VVNK",
"VVZ",
"XX",
"Y",
"ZZ1",
"ZZ2",
"ZZ221",
"ZZ222"
],
"ner":[
"ActorsAbstractions",
"ActorsFirstPerson",
"ActorsPeople",
"ActorsPublicEntities",
"CitationAuthority",
"CitationControversy",
"CitationNeutral",
"ConfidenceHedged",
"ConfidenceHigh",
"OrganizationNarrative",
"OrganizationReasoning",
"PlanningFuture",
"PlanningStrategy",
"SentimentNegative",
"SentimentPositive",
"SignpostingAcademicWritingMoves",
"SignpostingMetadiscourse",
"StanceEmphatic",
"StanceModerated"
]
},
"pipeline":[
"transformer",
"tagger",
"ner"
],
"components":[
"transformer",
"tagger",
"ner"
],
"disabled":[
],
"performance":{
"tag_acc":0.9838874397,
"ents_f":0.8857562763,
"ents_p":0.8882993329,
"ents_r":0.8832277388,
"ents_per_type":{
"ActorsFirstPerson":{
"p":0.9099651568,
"r":0.9155798626,
"f":0.9127638753
},
"ActorsAbstractions":{
"p":0.8877091098,
"r":0.8873245538,
"f":0.8875167902
},
"SentimentPositive":{
"p":0.8424859908,
"r":0.8302208835,
"f":0.8363084703
},
"ActorsPeople":{
"p":0.9245164475,
"r":0.9314975583,
"f":0.9279938737
},
"SignpostingMetadiscourse":{
"p":0.9420821114,
"r":0.9216975493,
"f":0.9317783552
},
"OrganizationReasoning":{
"p":0.9065213002,
"r":0.8960287368,
"f":0.9012444801
},
"SentimentNegative":{
"p":0.826066254,
"r":0.8137773906,
"f":0.8198757764
},
"OrganizationNarrative":{
"p":0.8930481283,
"r":0.8644067797,
"f":0.8784940691
},
"ActorsPublicEntities":{
"p":0.9016686532,
"r":0.9000594884,
"f":0.9008633522
},
"ConfidenceHedged":{
"p":0.9001426534,
"r":0.9029765312,
"f":0.9015573653
},
"StanceEmphatic":{
"p":0.8945487042,
"r":0.9087607808,
"f":0.901598739
},
"ConfidenceHigh":{
"p":0.8826548067,
"r":0.8637428858,
"f":0.8730964467
},
"PlanningFuture":{
"p":0.8868312757,
"r":0.8990404673,
"f":0.8928941371
},
"SignpostingAcademicWritingMoves":{
"p":0.76987061,
"r":0.7642201835,
"f":0.7670349908
},
"PlanningStrategy":{
"p":0.8410391898,
"r":0.8340611354,
"f":0.8375356282
},
"CitationAuthority":{
"p":0.847715736,
"r":0.8199672668,
"f":0.8336106489
},
"StanceModerated":{
"p":0.8598014888,
"r":0.8828025478,
"f":0.87115022
},
"CitationNeutral":{
"p":0.8945945946,
"r":0.888590604,
"f":0.8915824916
},
"CitationControversy":{
"p":0.8925619835,
"r":0.8925619835,
"f":0.8925619835
}
},
"transformer_loss":23139.3976026688,
"tagger_loss":6522.5364216973,
"ner_loss":20483.8170848669
},
"requirements":[
"spacy-transformers>=1.1.8,<1.2.0"
]
}