jdoerfler commited on
Commit
0d8ee8f
·
verified ·
1 Parent(s): 1e31153

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ id_dep_web_sm/parser/model filter=lfs diff=lfs merge=lfs -text
37
+ id_dep_web_sm/tagger/model filter=lfs diff=lfs merge=lfs -text
38
+ id_dep_web_sm/tok2vec/model filter=lfs diff=lfs merge=lfs -text
id_dep_web_sm/config.cfg ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = null
3
+ dev = null
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ seed = 0
9
+ gpu_allocator = null
10
+
11
+ [nlp]
12
+ lang = "id"
13
+ pipeline = ["tok2vec","parser","tagger"]
14
+ disabled = []
15
+ before_creation = null
16
+ after_creation = null
17
+ after_pipeline_creation = null
18
+ batch_size = 1000
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
+
22
+ [components]
23
+
24
+ [components.parser]
25
+ factory = "parser"
26
+ learn_tokens = false
27
+ min_action_freq = 30
28
+ moves = null
29
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
30
+ update_with_oracle_cut_size = 100
31
+
32
+ [components.parser.model]
33
+ @architectures = "spacy.TransitionBasedParser.v2"
34
+ state_type = "parser"
35
+ extra_state_tokens = false
36
+ hidden_width = 64
37
+ maxout_pieces = 2
38
+ use_upper = true
39
+ nO = null
40
+
41
+ [components.parser.model.tok2vec]
42
+ @architectures = "spacy.HashEmbedCNN.v2"
43
+ pretrained_vectors = null
44
+ width = 96
45
+ depth = 4
46
+ embed_size = 2000
47
+ window_size = 1
48
+ maxout_pieces = 3
49
+ subword_features = true
50
+
51
+ [components.tagger]
52
+ factory = "tagger"
53
+ label_smoothing = 0.0
54
+ neg_prefix = "!"
55
+ overwrite = false
56
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
57
+
58
+ [components.tagger.model]
59
+ @architectures = "spacy.Tagger.v2"
60
+ nO = null
61
+ normalize = false
62
+
63
+ [components.tagger.model.tok2vec]
64
+ @architectures = "spacy.HashEmbedCNN.v2"
65
+ pretrained_vectors = null
66
+ width = 96
67
+ depth = 4
68
+ embed_size = 2000
69
+ window_size = 1
70
+ maxout_pieces = 3
71
+ subword_features = true
72
+
73
+ [components.tok2vec]
74
+ factory = "tok2vec"
75
+
76
+ [components.tok2vec.model]
77
+ @architectures = "spacy.HashEmbedCNN.v2"
78
+ pretrained_vectors = null
79
+ width = 96
80
+ depth = 4
81
+ embed_size = 2000
82
+ window_size = 1
83
+ maxout_pieces = 3
84
+ subword_features = true
85
+
86
+ [corpora]
87
+
88
+ [corpora.dev]
89
+ @readers = "spacy.Corpus.v1"
90
+ path = ${paths.dev}
91
+ gold_preproc = false
92
+ max_length = 0
93
+ limit = 0
94
+ augmenter = null
95
+
96
+ [corpora.train]
97
+ @readers = "spacy.Corpus.v1"
98
+ path = ${paths.train}
99
+ gold_preproc = false
100
+ max_length = 0
101
+ limit = 0
102
+ augmenter = null
103
+
104
+ [training]
105
+ seed = ${system.seed}
106
+ gpu_allocator = ${system.gpu_allocator}
107
+ dropout = 0.1
108
+ accumulate_gradient = 1
109
+ patience = 1600
110
+ max_epochs = 0
111
+ max_steps = 20000
112
+ eval_frequency = 200
113
+ frozen_components = []
114
+ annotating_components = []
115
+ dev_corpus = "corpora.dev"
116
+ train_corpus = "corpora.train"
117
+ before_to_disk = null
118
+ before_update = null
119
+
120
+ [training.batcher]
121
+ @batchers = "spacy.batch_by_words.v1"
122
+ discard_oversize = false
123
+ tolerance = 0.2
124
+ get_length = null
125
+
126
+ [training.batcher.size]
127
+ @schedules = "compounding.v1"
128
+ start = 100
129
+ stop = 1000
130
+ compound = 1.001
131
+ t = 0.0
132
+
133
+ [training.logger]
134
+ @loggers = "spacy.ConsoleLogger.v1"
135
+ progress_bar = false
136
+
137
+ [training.optimizer]
138
+ @optimizers = "Adam.v1"
139
+ beta1 = 0.9
140
+ beta2 = 0.999
141
+ L2_is_weight_decay = true
142
+ L2 = 0.01
143
+ grad_clip = 1.0
144
+ use_averages = false
145
+ eps = 0.00000001
146
+ learn_rate = 0.001
147
+
148
+ [training.score_weights]
149
+ dep_uas = 0.25
150
+ dep_las = 0.25
151
+ dep_las_per_type = null
152
+ sents_p = null
153
+ sents_r = null
154
+ sents_f = 0.0
155
+ tag_acc = 0.5
156
+
157
+ [pretraining]
158
+
159
+ [initialize]
160
+ vectors = ${paths.vectors}
161
+ init_tok2vec = ${paths.init_tok2vec}
162
+ vocab_data = null
163
+ lookups = null
164
+ before_init = null
165
+ after_init = null
166
+
167
+ [initialize.components]
168
+
169
+ [initialize.tokenizer]
id_dep_web_sm/meta.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"id",
3
+ "name":"pipeline",
4
+ "version":"0.0.0",
5
+ "spacy_version":">=3.8.3,<3.9.0",
6
+ "description":"",
7
+ "author":"",
8
+ "email":"",
9
+ "url":"",
10
+ "license":"",
11
+ "spacy_git_version":"be0fa81",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null,
17
+ "mode":"default"
18
+ },
19
+ "labels":{
20
+ "tok2vec":[
21
+
22
+ ],
23
+ "parser":[
24
+ "ROOT",
25
+ "acl",
26
+ "acl:relcl",
27
+ "advcl",
28
+ "advmod",
29
+ "advmod:emph",
30
+ "amod",
31
+ "appos",
32
+ "aux",
33
+ "case",
34
+ "case:adv",
35
+ "cc",
36
+ "cc:preconj",
37
+ "ccomp",
38
+ "clf",
39
+ "compound",
40
+ "compound:a",
41
+ "conj",
42
+ "cop",
43
+ "csubj",
44
+ "csubj:pass",
45
+ "dep",
46
+ "det",
47
+ "discourse",
48
+ "fixed",
49
+ "flat",
50
+ "flat:foreign",
51
+ "flat:name",
52
+ "goeswith",
53
+ "iobj",
54
+ "list",
55
+ "mark",
56
+ "nmod",
57
+ "nmod:lmod",
58
+ "nmod:poss",
59
+ "nmod:tmod",
60
+ "nsubj",
61
+ "nsubj:pass",
62
+ "nummod",
63
+ "obj",
64
+ "obl",
65
+ "obl:agent",
66
+ "obl:tmod",
67
+ "orphan",
68
+ "parataxis",
69
+ "punct",
70
+ "vocative",
71
+ "xcomp"
72
+ ],
73
+ "tagger":[
74
+ "PROPN",
75
+ "AUX",
76
+ "DET",
77
+ "NOUN",
78
+ "PRON",
79
+ "VERB",
80
+ "ADP",
81
+ "PUNCT",
82
+ "ADV",
83
+ "CCONJ",
84
+ "SCONJ",
85
+ "NUM",
86
+ "ADJ",
87
+ "PART",
88
+ "SYM",
89
+ "INTJ",
90
+ "X"
91
+ ]
92
+ },
93
+ "pipeline":[
94
+ "tok2vec",
95
+ "parser",
96
+ "tagger"
97
+ ],
98
+ "components":[
99
+ "tok2vec",
100
+ "parser",
101
+ "tagger"
102
+ ],
103
+ "disabled":[
104
+
105
+ ]
106
+ }
id_dep_web_sm/parser/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":30,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
id_dep_web_sm/parser/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0d9591e65e456f4416183abafe94ceb8a38f3343c9f22ae755f30a82566f60
3
+ size 4060164
id_dep_web_sm/parser/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves� E{"0":{"nsubj":-2,"cop":-3,"det":-4,"ROOT":-5,"nsubj:pass":-6,"acl:relcl":-7,"case":-8,"obl":-9,"nmod":-10,"punct":-11,"appos":-12,"amod":-13,"compound":-14,"advmod":-15,"cc":-16,"mark":-17,"acl":-18,"obj":-19,"conj":-20,"nmod:poss":-21,"advcl":-22,"flat:name":-23,"nmod:tmod":-24,"nummod":-25,"aux":-26,"dep":-27,"xcomp":-28,"advmod:emph":-29,"ccomp":-30,"parataxis":-31,"obl:tmod":-32,"clf":-33,"flat":-34,"fixed":-35,"discourse":-36,"vocative":-37,"obl:agent":-38,"case:adv":-39,"nmod:lmod":-40,"compound:a":-41,"goeswith":-42,"orphan":-43,"csubj":-44,"iobj":-45,"list":-46,"csubj:pass":-47,"cc:preconj":-48,"flat:foreign":-49},"1":{"nsubj":-2,"cop":-3,"det":-4,"ROOT":-5,"nsubj:pass":-6,"acl:relcl":-7,"case":-8,"obl":-9,"nmod":-10,"punct":-11,"appos":-12,"amod":-13,"compound":-14,"advmod":-15,"cc":-16,"mark":-17,"acl":-18,"obj":-19,"conj":-20,"nmod:poss":-21,"advcl":-22,"flat:name":-23,"nmod:tmod":-24,"nummod":-25,"aux":-26,"dep":-27,"xcomp":-28,"advmod:emph":-29,"ccomp":-30,"parataxis":-31,"obl:tmod":-32,"clf":-33,"flat":-34,"fixed":-35,"discourse":-36,"vocative":-37,"obl:agent":-38,"case:adv":-39,"nmod:lmod":-40,"compound:a":-41,"goeswith":-42,"orphan":-43,"csubj":-44,"iobj":-45,"list":-46,"csubj:pass":-47,"cc:preconj":-48,"flat:foreign":-49},"2":{"dep":0,"nsubj":-2,"cop":-3,"det":-4,"ROOT":-5,"nsubj:pass":-6,"acl:relcl":-7,"case":-8,"obl":-9,"nmod":-10,"punct":-11,"appos":-12,"amod":-13,"compound":-14,"advmod":-15,"cc":-16,"mark":-17,"acl":-18,"obj":-19,"conj":-20,"nmod:poss":-21,"advcl":-22,"flat:name":-23,"nmod:tmod":-24,"nummod":-25,"aux":-26,"xcomp":-28,"advmod:emph":-29,"ccomp":-30,"parataxis":-31,"obl:tmod":-32,"clf":-33,"flat":-34,"fixed":-35,"discourse":-36,"vocative":-37,"obl:agent":-38,"case:adv":-39,"nmod:lmod":-40,"compound:a":-41,"goeswith":-42,"orphan":-43,"csubj":-44,"iobj":-45,"list":-46,"csubj:pass":-47,"cc:preconj":-48,"flat:foreign":-49},"3":{"dep":0,"nsubj":-2,"cop":-3,"det":-4,"ROOT":-5,"nsubj:pass":-6,"acl:relcl":-7,"case":-8,"obl":-9,"nmod":-10,"punct":-11,"appos":-12,"amod":-13,"compound":-14,"advmod":-15,"cc":-16,"mark":-17,"acl":-18,"obj":-19,"conj":-20,"nmod:poss":-21,"advcl":-22,"flat:name":-23,"nmod:tmod":-24,"nummod":-25,"aux":-26,"xcomp":-28,"advmod:emph":-29,"ccomp":-30,"parataxis":-31,"obl:tmod":-32,"clf":-33,"flat":-34,"fixed":-35,"discourse":-36,"vocative":-37,"obl:agent":-38,"case:adv":-39,"nmod:lmod":-40,"compound:a":-41,"goeswith":-42,"orphan":-43,"csubj":-44,"iobj":-45,"list":-46,"csubj:pass":-47,"cc:preconj":-48,"flat:foreign":-49},"4":{"ROOT":0,"nsubj":-2,"cop":-3,"det":-4,"nsubj:pass":-6,"acl:relcl":-7,"case":-8,"obl":-9,"nmod":-10,"punct":-11,"appos":-12,"amod":-13,"compound":-14,"advmod":-15,"cc":-16,"mark":-17,"acl":-18,"obj":-19,"conj":-20,"nmod:poss":-21,"advcl":-22,"flat:name":-23,"nmod:tmod":-24,"nummod":-25,"aux":-26,"dep":-27,"xcomp":-28,"advmod:emph":-29,"ccomp":-30,"parataxis":-31,"obl:tmod":-32,"clf":-33,"flat":-34,"fixed":-35,"discourse":-36,"vocative":-37,"obl:agent":-38,"case:adv":-39,"nmod:lmod":-40,"compound:a":-41,"goeswith":-42,"orphan":-43,"csubj":-44,"iobj":-45,"list":-46,"csubj:pass":-47,"cc:preconj":-48,"flat:foreign":-49}}�cfg��neg_key�
id_dep_web_sm/tagger/cfg ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_smoothing":0.0,
3
+ "labels":[
4
+ "PROPN",
5
+ "AUX",
6
+ "DET",
7
+ "NOUN",
8
+ "PRON",
9
+ "VERB",
10
+ "ADP",
11
+ "PUNCT",
12
+ "ADV",
13
+ "CCONJ",
14
+ "SCONJ",
15
+ "NUM",
16
+ "ADJ",
17
+ "PART",
18
+ "SYM",
19
+ "INTJ",
20
+ "X"
21
+ ],
22
+ "neg_prefix":"!",
23
+ "overwrite":false
24
+ }
id_dep_web_sm/tagger/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574edacde8d90c2a76babadc1257b0fd347155ad0a3079f90cd1509d9b388b68
3
+ size 3712437
id_dep_web_sm/tok2vec/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
id_dep_web_sm/tok2vec/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8459bda201c5daf60072489266a53b70f5c3a0b6bdfd3fab5d622a443b8754b
3
+ size 3705091
id_dep_web_sm/tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
id_dep_web_sm/vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
id_dep_web_sm/vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
id_dep_web_sm/vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
id_dep_web_sm/vocab/vectors ADDED
Binary file (128 Bytes). View file
 
id_dep_web_sm/vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }