add tokenizers and dict files
Browse files
dicts/dict.transformer.base.en.const.txt
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
</NP> 834587
|
2 |
+
<NP> 834587
|
3 |
+
NN 655960
|
4 |
+
IN 559326
|
5 |
+
DT 458456
|
6 |
+
</VP> 450160
|
7 |
+
<VP> 450160
|
8 |
+
NNP 405496
|
9 |
+
</S> 300422
|
10 |
+
<S> 300422
|
11 |
+
JJ 292882
|
12 |
+
NNS 272432
|
13 |
+
. 251036
|
14 |
+
XX 250846
|
15 |
+
</PP> 243084
|
16 |
+
<PP> 243084
|
17 |
+
, 230006
|
18 |
+
PRP 225438
|
19 |
+
RB 224084
|
20 |
+
VB 181970
|
21 |
+
VBD 169356
|
22 |
+
CC 154986
|
23 |
+
</TOP> 141871
|
24 |
+
<TOP> 141871
|
25 |
+
VBZ 124814
|
26 |
+
VBP 107908
|
27 |
+
VBN 104034
|
28 |
+
CD 101496
|
29 |
+
</SBAR> 85487
|
30 |
+
<SBAR> 85487
|
31 |
+
VBG 85382
|
32 |
+
TO 76424
|
33 |
+
</ADVP> 73664
|
34 |
+
<ADVP> 73664
|
35 |
+
MD 63004
|
36 |
+
PRP$ 57414
|
37 |
+
</ADJP> 45325
|
38 |
+
<ADJP> 45325
|
39 |
+
HYPH 35952
|
40 |
+
</NML> 34655
|
41 |
+
<NML> 34655
|
42 |
+
UH 34140
|
43 |
+
POS 33028
|
44 |
+
</WHNP> 25806
|
45 |
+
<WHNP> 25806
|
46 |
+
WP 25254
|
47 |
+
WDT 24226
|
48 |
+
'' 23860
|
49 |
+
`` 22524
|
50 |
+
: 21704
|
51 |
+
RP 20390
|
52 |
+
WRB 20208
|
53 |
+
</INTJ> 16172
|
54 |
+
<INTJ> 16172
|
55 |
+
" 15072
|
56 |
+
JJR 13996
|
57 |
+
NNPS 13962
|
58 |
+
</QP> 11774
|
59 |
+
<QP> 11774
|
60 |
+
VERB 10126
|
61 |
+
</PRT> 9713
|
62 |
+
<PRT> 9713
|
63 |
+
</WHADVP> 9662
|
64 |
+
<WHADVP> 9662
|
65 |
+
$ 9022
|
66 |
+
EX 8722
|
67 |
+
JJS 7716
|
68 |
+
RBR 7040
|
69 |
+
) 6862
|
70 |
+
</EDITED> 6841
|
71 |
+
<EDITED> 6841
|
72 |
+
( 6782
|
73 |
+
</SQ> 6255
|
74 |
+
<SQ> 6255
|
75 |
+
</FRAG> 5225
|
76 |
+
<FRAG> 5225
|
77 |
+
PDT 5088
|
78 |
+
</PRN> 3999
|
79 |
+
<PRN> 3999
|
80 |
+
</SINV> 3317
|
81 |
+
<SINV> 3317
|
82 |
+
</SBARQ> 3305
|
83 |
+
<SBARQ> 3305
|
84 |
+
RBS 2882
|
85 |
+
FW 1956
|
86 |
+
</UCP> 1534
|
87 |
+
<UCP> 1534
|
88 |
+
NFP 1348
|
89 |
+
</CONJP> 1103
|
90 |
+
<CONJP> 1103
|
91 |
+
SYM 1102
|
92 |
+
</WHPP> 1002
|
93 |
+
<WHPP> 1002
|
94 |
+
</X> 816
|
95 |
+
<X> 816
|
96 |
+
' 782
|
97 |
+
WP$ 750
|
98 |
+
</EMBED> 718
|
99 |
+
<EMBED> 718
|
100 |
+
LS 690
|
101 |
+
</WHADJP> 516
|
102 |
+
<WHADJP> 516
|
103 |
+
ADD 460
|
104 |
+
</LST> 356
|
105 |
+
<LST> 356
|
106 |
+
</META> 288
|
107 |
+
<META> 288
|
108 |
+
</RRC> 232
|
109 |
+
<RRC> 232
|
110 |
+
</NAC> 218
|
111 |
+
<NAC> 218
|
112 |
+
</NX> 74
|
113 |
+
<NX> 74
|
114 |
+
AFX 62
|
115 |
+
</XX> 35
|
116 |
+
<XX> 35
|
dicts/dict.transformer.base.en.mt.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dicts/dict.transformer.large.multi.mtpg.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizers/bpe32k.en/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizers/bpe32k.en/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizers/bpe32k.ko/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizers/bpe32k.ko/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|