Eason Lu commited on
Commit
01a25ac
·
2 Parent(s): fdce050 4128cae

Merge branch 'eason/refactor' of https://github.com/project-kxkg/project-t into eason/refactor

Browse files
Files changed (1) hide show
  1. SRT.py +31 -3
SRT.py CHANGED
@@ -1,6 +1,8 @@
1
  from datetime import timedelta
2
  import os
3
  import whisper
 
 
4
 
5
  class SRT_segment(object):
6
  def __init__(self, *args) -> None:
@@ -154,9 +156,35 @@ class SRT_script():
154
  f.write(self.form_bilingual_str())
155
  pass
156
 
157
- def correct_with_force_term():
158
- # force term correction
159
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  pass
161
 
162
 
 
1
  from datetime import timedelta
2
  import os
3
  import whisper
4
+ from csv import reader
5
+ import re
6
 
7
  class SRT_segment(object):
8
  def __init__(self, *args) -> None:
 
156
  f.write(self.form_bilingual_str())
157
  pass
158
 
159
+ def correct_with_force_term(self):
160
+ ## force term correction
161
+ # TODO: shortcut translation i.e. VA, ob
162
+ # TODO: variety of translation
163
+
164
+ # load term dictionary
165
+ with open("finetune_data/dict.csv",'r', encoding='utf-8') as f:
166
+ csv_reader = reader(f)
167
+ term_dict = {rows[0]:rows[1] for rows in csv_reader}
168
+
169
+ # change term
170
+ for seg in self.segments:
171
+ ready_words = re.sub('\n', '\n ', seg.source_text).split(" ")
172
+ for i in range(len(ready_words)):
173
+ word = ready_words[i]
174
+ if word[-2:] == ".\n" :
175
+ if word[:-2].lower() in term_dict :
176
+ new_word = word.replace(word[:-2], term_dict.get(word[:-2].lower())) + ' '
177
+ ready_words[i] = new_word
178
+ else:
179
+ ready_words[i] = word + ' '
180
+ elif word.lower() in term_dict :
181
+ new_word = word.replace(word,term_dict.get(word.lower())) + ' '
182
+ ready_words[i] = new_word
183
+ else :
184
+ ready_words[i]= word + ' '
185
+ seg.source_text = re.sub('\n ', '\n', "".join(ready_words))
186
+
187
+ print(self)
188
  pass
189
 
190