Upload folder using huggingface_hub
Browse files- PKG-INFO +21 -0
- README.md +4 -3
- dummy_ner/__init__.py +17 -0
- dummy_ner/artifacts/config.cfg +33 -0
- dummy_ner/artifacts/meta.json +3 -0
- dummy_ner/artifacts/ner/embedding/embedding/config.json +25 -0
- dummy_ner/artifacts/ner/embedding/embedding/model.safetensors +3 -0
- dummy_ner/artifacts/ner/embedding/embedding/parameters.safetensors +3 -0
- dummy_ner/artifacts/ner/embedding/embedding/special_tokens_map.json +37 -0
- dummy_ner/artifacts/ner/embedding/embedding/tokenizer.json +0 -0
- dummy_ner/artifacts/ner/embedding/embedding/tokenizer_config.json +57 -0
- dummy_ner/artifacts/ner/embedding/embedding/vocab.txt +0 -0
- dummy_ner/artifacts/ner/embedding/parameters.safetensors +3 -0
- dummy_ner/artifacts/ner/parameters.safetensors +3 -0
- dummy_ner/artifacts/tokenizer +1 -0
- pyproject.toml +76 -0
PKG-INFO
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: dummy-ner
|
3 |
+
Version: 0.1.0
|
4 |
+
Summary:
|
5 |
+
Author: Perceval Wajsburt
|
6 |
+
Author-email: [email protected]
|
7 |
+
Requires-Python: >=3.7.1,<4.0
|
8 |
+
Classifier: Programming Language :: Python :: 3
|
9 |
+
Classifier: Programming Language :: Python :: 3.8
|
10 |
+
Classifier: Programming Language :: Python :: 3.9
|
11 |
+
Classifier: Programming Language :: Python :: 3.10
|
12 |
+
Classifier: Programming Language :: Python :: 3.11
|
13 |
+
Classifier: Programming Language :: Python :: 3.12
|
14 |
+
Requires-Dist: edsnlp[ml] (>=0.11.2)
|
15 |
+
Requires-Dist: sentencepiece (>=0.1.96,<0.2.0)
|
16 |
+
Description-Content-Type: text/markdown
|
17 |
+
|
18 |
+
# Dummy EDS-NLP NER model
|
19 |
+
|
20 |
+
This model was trained on the DEFT 2020 Track 3 dataset, but it's main purpose is to
|
21 |
+
test the integration of EDS-NLP with the Hugging Face Hub.
|
README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
1 |
+
# Dummy EDS-NLP NER model
|
2 |
+
|
3 |
+
This model was trained on the DEFT 2020 Track 3 dataset, but it's main purpose is to
|
4 |
+
test the integration of EDS-NLP with the Hugging Face Hub.
|
dummy_ner/__init__.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# -----------------------------------------
|
3 |
+
# This section was autogenerated by edsnlp
|
4 |
+
# -----------------------------------------
|
5 |
+
|
6 |
+
import edsnlp
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import Optional, Dict, Any
|
9 |
+
|
10 |
+
__version__ = None
|
11 |
+
|
12 |
+
def load(
|
13 |
+
overrides: Optional[Dict[str, Any]] = None,
|
14 |
+
) -> edsnlp.Pipeline:
|
15 |
+
artifacts_path = Path(__file__).parent / "artifacts"
|
16 |
+
model = edsnlp.load(artifacts_path, overrides=overrides)
|
17 |
+
return model
|
dummy_ner/artifacts/config.cfg
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[nlp]
|
2 |
+
lang = "eds"
|
3 |
+
pipeline = ["ner"]
|
4 |
+
|
5 |
+
[nlp.tokenizer]
|
6 |
+
@tokenizers = "eds.tokenizer"
|
7 |
+
|
8 |
+
[components]
|
9 |
+
|
10 |
+
[components.ner]
|
11 |
+
@factory = "eds.ner_crf"
|
12 |
+
target_span_getter = "gold_spans"
|
13 |
+
labels = ["anatomie", "date", "dose", "duree", "examen", "frequence", "mode", "moment", "pathologie", "sosy", "substance", "traitement", "valeur"]
|
14 |
+
infer_span_setter = true
|
15 |
+
mode = "joint"
|
16 |
+
window = 40
|
17 |
+
stride = 20
|
18 |
+
|
19 |
+
[components.ner.embedding]
|
20 |
+
@factory = "eds.text_cnn"
|
21 |
+
kernel_sizes = [3]
|
22 |
+
|
23 |
+
[components.ner.embedding.embedding]
|
24 |
+
@factory = "eds.transformer"
|
25 |
+
model = "./ner/embedding/embedding"
|
26 |
+
window = 128
|
27 |
+
stride = 96
|
28 |
+
|
29 |
+
[components.ner.span_setter]
|
30 |
+
ents = true
|
31 |
+
* = true
|
32 |
+
gold_spans = ["anatomie", "date", "dose", "duree", "examen", "frequence", "mode", "moment", "pathologie", "sosy", "substance", "traitement", "valeur"]
|
33 |
+
|
dummy_ner/artifacts/meta.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
|
3 |
+
}
|
dummy_ner/artifacts/ner/embedding/embedding/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./ner/embedding/embedding",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 128,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 512,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 2,
|
17 |
+
"num_hidden_layers": 2,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.40.1",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
dummy_ner/artifacts/ner/embedding/embedding/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ead96338fa7a78a05b3f0cb78763d825f9144b6ae6ff346092261e8d3c3ef046
|
3 |
+
size 17547912
|
dummy_ner/artifacts/ner/embedding/embedding/parameters.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9ad71db7a9edf51b3a873b80cab6a1af6f1ba026021af76bbee919c451ca5a4
|
3 |
+
size 8992
|
dummy_ner/artifacts/ner/embedding/embedding/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
dummy_ner/artifacts/ner/embedding/embedding/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dummy_ner/artifacts/ner/embedding/embedding/tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 1000000000000000019884624838656,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
dummy_ner/artifacts/ner/embedding/embedding/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dummy_ner/artifacts/ner/embedding/parameters.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4653e3d2bda5ef27335411413e121eefdd4ceb7aed323f3964e9ad43cbab4d8d
|
3 |
+
size 263528
|
dummy_ner/artifacts/ner/parameters.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e5a517b89e44c372a89938808359ff8113651c41e383d10d88a6fd01d80aea7
|
3 |
+
size 34363
|
dummy_ner/artifacts/tokenizer
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
��prefix_search��suffix_search��infix_finditer��token_match��url_match��exceptions��faster_heuristics�
|
pyproject.toml
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = [ "poetry-core>=1.0.0", "pypandoc<1.8.0",]
|
3 |
+
build-backend = "poetry.core.masonry.api"
|
4 |
+
|
5 |
+
[tool.edsnlp]
|
6 |
+
model_name = "dummy-ner"
|
7 |
+
|
8 |
+
[tool.poetry]
|
9 |
+
name = "dummy-ner"
|
10 |
+
version = "0.1.0"
|
11 |
+
description = ""
|
12 |
+
authors = [ "Perceval Wajsburt <[email protected]>",]
|
13 |
+
readme = "README.md"
|
14 |
+
include = [ "dummy_ner/artifacts/**",]
|
15 |
+
[[tool.poetry.packages]]
|
16 |
+
include = "dummy_ner"
|
17 |
+
|
18 |
+
[tool.interrogate]
|
19 |
+
ignore-init-method = true
|
20 |
+
ignore-init-module = true
|
21 |
+
ignore-magic = false
|
22 |
+
ignore-semiprivate = false
|
23 |
+
ignore-private = false
|
24 |
+
ignore-property-decorators = false
|
25 |
+
ignore-module = true
|
26 |
+
ignore-nested-functions = false
|
27 |
+
ignore-nested-classes = true
|
28 |
+
ignore-setters = false
|
29 |
+
fail-under = 10
|
30 |
+
exclude = [ "docs", "build", "tests",]
|
31 |
+
verbose = 0
|
32 |
+
quiet = false
|
33 |
+
whitelist-regex = []
|
34 |
+
color = true
|
35 |
+
omit-covered-files = false
|
36 |
+
|
37 |
+
[tool.mypy]
|
38 |
+
plugins = "pydantic.mypy"
|
39 |
+
|
40 |
+
[tool.ruff]
|
41 |
+
fix = true
|
42 |
+
extend-exclude = [ ".git", "__pycache__", "__init__.py", ".mypy_cache", ".pytest_cache", ".venv", "build",]
|
43 |
+
line-length = 88
|
44 |
+
select = [ "E", "F", "W", "I001",]
|
45 |
+
|
46 |
+
[tool.poetry.dependencies]
|
47 |
+
python = ">=3.7.1,<4.0"
|
48 |
+
sentencepiece = "^0.1.96"
|
49 |
+
|
50 |
+
[tool.pytest.ini_options]
|
51 |
+
testpaths = [ "tests",]
|
52 |
+
|
53 |
+
[tool.ruff.flake8-tidy-imports]
|
54 |
+
ban-relative-imports = "parents"
|
55 |
+
|
56 |
+
[tool.ruff.extend-per-file-ignores]
|
57 |
+
"__init__.py" = [ "F401",]
|
58 |
+
|
59 |
+
[tool.ruff.isort]
|
60 |
+
known-first-party = [ "edsnlp",]
|
61 |
+
known-third-party = [ "build",]
|
62 |
+
order-by-type = true
|
63 |
+
|
64 |
+
[tool.coverage.report]
|
65 |
+
include = [ "quaero_ner/*", "scripts/*",]
|
66 |
+
omit = [ "tests/*",]
|
67 |
+
exclude_lines = [ "def __repr__", "if __name__ == .__main__.:", "@overload", "pragma: no cover", "raise .*Error", "raise .*Exception", "warn\\(", "if __name__ == .__main__.:", "if TYPE_CHECKING:", "class .*\\bProtocol\\):", "@(abc\\.)?abstractmethod", "Span.set_extension.*", "Doc.set_extension.*", "Token.set_extension.*",]
|
68 |
+
|
69 |
+
[tool.poetry.dependencies.edsnlp]
|
70 |
+
version = ">=0.11.2"
|
71 |
+
extras = [ "ml",]
|
72 |
+
|
73 |
+
[tool.poetry.group.docs]
|
74 |
+
optional = true
|
75 |
+
|
76 |
+
[tool.poetry.group.docs.dependencies]
|