bourdoiscatie
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -1160,27 +1160,35 @@ The following hyperparameters were used during training:
|
|
1160 |
|
1161 |
### NERmembert-large-4entities
|
1162 |
```
|
1163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1164 |
```
|
1165 |
|
1166 |
### multiconer
|
1167 |
-
|
1168 |
-
|
1169 |
title={{SemEval-2023 Task 2: Fine-grained Multilingual Named Entity Recognition (MultiCoNER 2)}},
|
1170 |
author={Fetahu, Besnik and Kar, Sudipta and Chen, Zhiyu and Rokhlenko, Oleg and Malmasi, Shervin},
|
1171 |
booktitle={Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)},
|
1172 |
year={2023},
|
1173 |
publisher={Association for Computational Linguistics}}
|
1174 |
|
1175 |
-
|
1176 |
title={{MultiCoNER v2: a Large Multilingual dataset for Fine-grained and Noisy Named Entity Recognition}},
|
1177 |
author={Fetahu, Besnik and Chen, Zhiyu and Kar, Sudipta and Rokhlenko, Oleg and Malmasi, Shervin},
|
1178 |
year={2023}}
|
1179 |
-
|
1180 |
|
1181 |
### multinerd
|
1182 |
-
|
1183 |
-
|
1184 |
title = "{M}ulti{NERD}: A Multilingual, Multi-Genre and Fine-Grained Dataset for Named Entity Recognition (and Disambiguation)",
|
1185 |
author = "Tedeschi, Simone and Navigli, Roberto",
|
1186 |
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
|
@@ -1191,44 +1199,70 @@ TODO
|
|
1191 |
url = "https://aclanthology.org/2022.findings-naacl.60",
|
1192 |
doi = "10.18653/v1/2022.findings-naacl.60",
|
1193 |
pages = "801--812"}
|
|
|
1194 |
|
1195 |
### pii-masking-200k
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
|
1197 |
-
|
1198 |
-
|
1199 |
-
|
1200 |
-
|
1201 |
-
|
1202 |
-
|
1203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1204 |
|
1205 |
### wikiner
|
1206 |
-
|
1207 |
-
|
1208 |
-
title = {Learning multilingual named entity recognition from Wikipedia},
|
1209 |
-
journal = {Artificial Intelligence},
|
1210 |
-
volume = {194},
|
1211 |
-
pages = {151-175},
|
1212 |
-
year = {2013},
|
1213 |
-
note = {Artificial Intelligence, Wikipedia and Semi-Structured Resources},
|
1214 |
-
issn = {0004-3702},
|
1215 |
-
doi = {https://doi.org/10.1016/j.artint.2012.03.006},
|
1216 |
-
url = {https://www.sciencedirect.com/science/article/pii/S0004370212000276},
|
1217 |
-
author = {Joel Nothman and Nicky Ringland and Will Radford and Tara Murphy and James R. Curran}}
|
1218 |
-
|
1219 |
|
1220 |
### frenchNER_4entities
|
1221 |
```
|
1222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1223 |
```
|
1224 |
|
|
|
1225 |
### CamemBERT
|
1226 |
-
|
|
|
1227 |
title={CamemBERT: a Tasty French Language Model},
|
1228 |
author={Martin, Louis and Muller, Benjamin and Su{\'a}rez, Pedro Javier Ortiz and Dupont, Yoann and Romary, Laurent and de la Clergerie, {\'E}ric Villemonte and Seddah, Djam{\'e} and Sagot, Beno{\^\i}t},
|
1229 |
booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
|
1230 |
year={2020}}
|
1231 |
-
|
1232 |
|
1233 |
## License
|
1234 |
[cc-by-4.0](https://creativecommons.org/licenses/by/4.0/deed.en)
|
|
|
1160 |
|
1161 |
### NERmembert-large-4entities
|
1162 |
```
|
1163 |
+
@misc {NERmembert2024,
|
1164 |
+
author = { {BOURDOIS, Loïck} },
|
1165 |
+
organization = { {Centre Aquitain des Technologies de l'Information et Electroniques} },
|
1166 |
+
title = { NERmembert-large-4entities (Revision 1cd8be5) },
|
1167 |
+
year = 2024,
|
1168 |
+
url = { https://huggingface.co/CATIE-AQ/NERmembert-large-4entities },
|
1169 |
+
doi = { 10.57967/hf/1752 },
|
1170 |
+
publisher = { Hugging Face }
|
1171 |
+
}
|
1172 |
```
|
1173 |
|
1174 |
### multiconer
|
1175 |
+
```
|
1176 |
+
@inproceedings{multiconer2-report,
|
1177 |
title={{SemEval-2023 Task 2: Fine-grained Multilingual Named Entity Recognition (MultiCoNER 2)}},
|
1178 |
author={Fetahu, Besnik and Kar, Sudipta and Chen, Zhiyu and Rokhlenko, Oleg and Malmasi, Shervin},
|
1179 |
booktitle={Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)},
|
1180 |
year={2023},
|
1181 |
publisher={Association for Computational Linguistics}}
|
1182 |
|
1183 |
+
@article{multiconer2-data,
|
1184 |
title={{MultiCoNER v2: a Large Multilingual dataset for Fine-grained and Noisy Named Entity Recognition}},
|
1185 |
author={Fetahu, Besnik and Chen, Zhiyu and Kar, Sudipta and Rokhlenko, Oleg and Malmasi, Shervin},
|
1186 |
year={2023}}
|
1187 |
+
```
|
1188 |
|
1189 |
### multinerd
|
1190 |
+
```
|
1191 |
+
@inproceedings{tedeschi-navigli-2022-multinerd,
|
1192 |
title = "{M}ulti{NERD}: A Multilingual, Multi-Genre and Fine-Grained Dataset for Named Entity Recognition (and Disambiguation)",
|
1193 |
author = "Tedeschi, Simone and Navigli, Roberto",
|
1194 |
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
|
|
|
1199 |
url = "https://aclanthology.org/2022.findings-naacl.60",
|
1200 |
doi = "10.18653/v1/2022.findings-naacl.60",
|
1201 |
pages = "801--812"}
|
1202 |
+
```
|
1203 |
|
1204 |
### pii-masking-200k
|
1205 |
+
```
|
1206 |
+
@misc {ai4privacy_2023,
|
1207 |
+
author = { {ai4Privacy} },
|
1208 |
+
title = { pii-masking-200k (Revision 1d4c0a1) },
|
1209 |
+
year = 2023,
|
1210 |
+
url = { https://huggingface.co/datasets/ai4privacy/pii-masking-200k },
|
1211 |
+
doi = { 10.57967/hf/1532 },
|
1212 |
+
publisher = { Hugging Face }}
|
1213 |
+
```
|
1214 |
|
1215 |
+
### wikiann
|
1216 |
+
```
|
1217 |
+
@inproceedings{rahimi-etal-2019-massively,
|
1218 |
+
title = "Massively Multilingual Transfer for {NER}",
|
1219 |
+
author = "Rahimi, Afshin and Li, Yuan and Cohn, Trevor",
|
1220 |
+
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
|
1221 |
+
month = jul,
|
1222 |
+
year = "2019",
|
1223 |
+
address = "Florence, Italy",
|
1224 |
+
publisher = "Association for Computational Linguistics",
|
1225 |
+
url = "https://www.aclweb.org/anthology/P19-1015",
|
1226 |
+
pages = "151--164"}
|
1227 |
+
```
|
1228 |
|
1229 |
### wikiner
|
1230 |
+
```
|
1231 |
+
@article{NOTHMAN2013151,
|
1232 |
+
title = {Learning multilingual named entity recognition from Wikipedia},
|
1233 |
+
journal = {Artificial Intelligence},
|
1234 |
+
volume = {194},
|
1235 |
+
pages = {151-175},
|
1236 |
+
year = {2013},
|
1237 |
+
note = {Artificial Intelligence, Wikipedia and Semi-Structured Resources},
|
1238 |
+
issn = {0004-3702},
|
1239 |
+
doi = {https://doi.org/10.1016/j.artint.2012.03.006},
|
1240 |
+
url = {https://www.sciencedirect.com/science/article/pii/S0004370212000276},
|
1241 |
+
author = {Joel Nothman and Nicky Ringland and Will Radford and Tara Murphy and James R. Curran}}
|
1242 |
+
```
|
1243 |
|
1244 |
### frenchNER_4entities
|
1245 |
```
|
1246 |
+
@misc {frenchNER2024,
|
1247 |
+
author = { {BOURDOIS, Loïck} },
|
1248 |
+
organization = { {Centre Aquitain des Technologies de l'Information et Electroniques} },
|
1249 |
+
title = { frenchNER_4entities },
|
1250 |
+
year = 2024,
|
1251 |
+
url = { https://huggingface.co/CATIE-AQ/frenchNER_4entities },
|
1252 |
+
doi = { 10.57967/hf/1751 },
|
1253 |
+
publisher = { Hugging Face }
|
1254 |
+
}
|
1255 |
```
|
1256 |
|
1257 |
+
|
1258 |
### CamemBERT
|
1259 |
+
```
|
1260 |
+
@inproceedings{martin2020camembert,
|
1261 |
title={CamemBERT: a Tasty French Language Model},
|
1262 |
author={Martin, Louis and Muller, Benjamin and Su{\'a}rez, Pedro Javier Ortiz and Dupont, Yoann and Romary, Laurent and de la Clergerie, {\'E}ric Villemonte and Seddah, Djam{\'e} and Sagot, Beno{\^\i}t},
|
1263 |
booktitle={Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
|
1264 |
year={2020}}
|
1265 |
+
```
|
1266 |
|
1267 |
## License
|
1268 |
[cc-by-4.0](https://creativecommons.org/licenses/by/4.0/deed.en)
|