Spaces:
Sleeping
Sleeping
Merge branch 'eason/main' into eason/fix_term_check
Browse files- dict_util.py +46 -0
- finetune_data/dict_enzh.csv +7 -2
- finetune_data/dict_freq.txt +17 -1
dict_util.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import pickle
|
3 |
+
|
4 |
+
### NOTICE: csv only accept two colomn input. but accept multi-time input.
|
5 |
+
|
6 |
+
|
7 |
+
# 1_2_3, 1 is action, 2 is supply object, 3 is accept object
|
8 |
+
def update_dict_csv(term_dict, f):
|
9 |
+
for rows in csv.reader(f):
|
10 |
+
if rows[0] in term_dict:
|
11 |
+
if rows[1] not in term_dict[rows[0]]:
|
12 |
+
term_dict[rows[0]] = term_dict[rows[0]]+[rows[1]]
|
13 |
+
else:
|
14 |
+
term_dict[rows[0]]=[rows[1]]
|
15 |
+
pass
|
16 |
+
|
17 |
+
def export_dict_csv(term_dict, f):
|
18 |
+
for key, val in term_dict.items():
|
19 |
+
csv.writer(f).writerow([key, val])
|
20 |
+
pass
|
21 |
+
|
22 |
+
def save_dict_pickle(term_dict, f):
|
23 |
+
pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL)
|
24 |
+
|
25 |
+
def update_csv_pickle(pickle_f, csv_f):
|
26 |
+
term_dict = pickle.load(pickle_f)
|
27 |
+
for rows in csv.reader(csv_f):
|
28 |
+
if rows[0] in term_dict:
|
29 |
+
if rows[1] not in term_dict[rows[0]]:
|
30 |
+
term_dict[rows[0]] = term_dict[rows[0]]+[rows[1]]
|
31 |
+
else:
|
32 |
+
term_dict[rows[0]]=[rows[1]]
|
33 |
+
#save to pickle file, highest protocal to get better performance
|
34 |
+
pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL)
|
35 |
+
|
36 |
+
|
37 |
+
#demo
|
38 |
+
term_dict_sc2 = {}
|
39 |
+
with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
|
40 |
+
update_dict_csv(term_dict_sc2,f)
|
41 |
+
|
42 |
+
with open("../test.csv", "w", encoding='utf-8') as w:
|
43 |
+
export_dict_csv(term_dict_sc2,w)
|
44 |
+
|
45 |
+
## for load pickle, just:
|
46 |
+
# pickle.load(f)
|
finetune_data/dict_enzh.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
zerg,虫族
|
3 |
protoss,神族
|
4 |
terran,人族
|
@@ -7,6 +7,7 @@ forge,锻炉
|
|
7 |
blink,闪现
|
8 |
evolution chamber,进化腔
|
9 |
cybernetics core,控制芯核
|
|
|
10 |
enhanced shockwaves,EMP范围
|
11 |
gravitic boosters,ob速度
|
12 |
armory,军械库
|
@@ -36,6 +37,7 @@ planetary fortress,大地堡
|
|
36 |
battle cruiser,大和
|
37 |
weapon refit,大和炮
|
38 |
brood lord,大龙
|
|
|
39 |
greater spire,大龙塔
|
40 |
anabolic synthesis,大牛速度
|
41 |
cyclone,导弹车
|
@@ -80,7 +82,7 @@ spine crawler,管子
|
|
80 |
marauder,光头
|
81 |
ghost,鬼兵
|
82 |
arm silo with nuke,核弹
|
83 |
-
carrier
|
84 |
hellion,火车
|
85 |
hellbat,火车侠
|
86 |
ravager,火蟑螂
|
@@ -175,3 +177,6 @@ glial reconstitution,蟑螂速度
|
|
175 |
concussive shells,震撼弹
|
176 |
stalker,追猎
|
177 |
disruptor,自爆球
|
|
|
|
|
|
|
|
1 |
+
barracks,兵营
|
2 |
zerg,虫族
|
3 |
protoss,神族
|
4 |
terran,人族
|
|
|
7 |
blink,闪现
|
8 |
evolution chamber,进化腔
|
9 |
cybernetics core,控制芯核
|
10 |
+
cybercore,控制芯核
|
11 |
enhanced shockwaves,EMP范围
|
12 |
gravitic boosters,ob速度
|
13 |
armory,军械库
|
|
|
37 |
battle cruiser,大和
|
38 |
weapon refit,大和炮
|
39 |
brood lord,大龙
|
40 |
+
broodling,巢虫
|
41 |
greater spire,大龙塔
|
42 |
anabolic synthesis,大牛速度
|
43 |
cyclone,导弹车
|
|
|
82 |
marauder,光头
|
83 |
ghost,鬼兵
|
84 |
arm silo with nuke,核弹
|
85 |
+
carrier,航母
|
86 |
hellion,火车
|
87 |
hellbat,火车侠
|
88 |
ravager,火蟑螂
|
|
|
177 |
concussive shells,震撼弹
|
178 |
stalker,追猎
|
179 |
disruptor,自爆球
|
180 |
+
zerg,虫族
|
181 |
+
protross,神族
|
182 |
+
terran,人族
|
finetune_data/dict_freq.txt
CHANGED
@@ -59,6 +59,7 @@ supply depot
|
|
59 |
overlord
|
60 |
pneumatized carapace
|
61 |
mutalisk
|
|
|
62 |
spire
|
63 |
viper
|
64 |
flyer attacks
|
@@ -174,4 +175,19 @@ disruptor
|
|
174 |
zerg
|
175 |
protross
|
176 |
terran
|
177 |
-
starcraft
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
overlord
|
60 |
pneumatized carapace
|
61 |
mutalisk
|
62 |
+
broodling
|
63 |
spire
|
64 |
viper
|
65 |
flyer attacks
|
|
|
175 |
zerg
|
176 |
protross
|
177 |
terran
|
178 |
+
starcraft
|
179 |
+
TvT
|
180 |
+
Maxpax
|
181 |
+
showtime
|
182 |
+
PvP
|
183 |
+
ZvZ
|
184 |
+
TvZ
|
185 |
+
TvP
|
186 |
+
ZvP
|
187 |
+
PvZ
|
188 |
+
PvT
|
189 |
+
ZvT
|
190 |
+
Florencio
|
191 |
+
cybercore
|
192 |
+
nest
|
193 |
+
follow-up
|