Yuhan-Lu commited on
Commit
ac6e110
·
1 Parent(s): 3e39830

update logic of individual translation

Browse files

Current logic would skip the unmatched inner method of set_translation in SRT class


Former-commit-id: 84535d2adcec9c824849f9606b628f663af8c2a4

Files changed (1) hide show
  1. pipeline.py +12 -8
pipeline.py CHANGED
@@ -24,7 +24,7 @@ def parse_args():
24
  parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
25
  parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
26
  parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
27
- parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo") # default change to gpt-4
28
  parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
29
  parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
30
  parser.add_argument("-v", help="auto encode script with video", action='store_true')
@@ -192,10 +192,10 @@ def check_translation(sentence, translation):
192
  """
193
  sentence_count = sentence.count('\n\n') + 1
194
  translation_count = translation.count('\n\n') + 1
195
- print("sentence length: ", len(sentence), sentence_count)
196
- print("translation length: ", len(translation), translation_count)
197
 
198
  if sentence_count != translation_count:
 
 
199
  return False
200
  else:
201
  return True
@@ -257,12 +257,16 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
257
  # if failure still happen, split into smaller tokens
258
  if attempt_left == 0:
259
  single_sentences = sentence.split("\n\n")
260
- print("merge sentence issue found: ", len(single_sentences), len(translate), single_sentences, translate)
261
  translate = ""
262
- for single_sentence in single_sentences:
263
- translate += get_response(model_name, single_sentence) + "\n\n"
264
- print("after correction: ", "chinese length: ", len(translate), translate)
265
- # print(type(translate))
 
 
 
 
266
  except Exception as e:
267
  logging.debug("An error has occurred during translation:",e)
268
  print("An error has occurred during translation:",e)
 
24
  parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
25
  parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
26
  parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
27
+ parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-4") # default change to gpt-4
28
  parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
29
  parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
30
  parser.add_argument("-v", help="auto encode script with video", action='store_true')
 
192
  """
193
  sentence_count = sentence.count('\n\n') + 1
194
  translation_count = translation.count('\n\n') + 1
 
 
195
 
196
  if sentence_count != translation_count:
197
+ # print("sentence length: ", len(sentence), sentence_count)
198
+ # print("translation length: ", len(translation), translation_count)
199
  return False
200
  else:
201
  return True
 
257
  # if failure still happen, split into smaller tokens
258
  if attempt_left == 0:
259
  single_sentences = sentence.split("\n\n")
260
+ print("merge sentence issue found for range", range)
261
  translate = ""
262
+ for i, single_sentence in enumerate(single_sentences):
263
+ if i == len(single_sentences) - 1:
264
+ translate += get_response(model_name, single_sentence)
265
+ else:
266
+ translate += get_response(model_name, single_sentence) + "\n\n"
267
+ # print(single_sentence, translate.split("\n\n")[-2])
268
+ print("solved by individually translation!")
269
+
270
  except Exception as e:
271
  logging.debug("An error has occurred during translation:",e)
272
  print("An error has occurred during translation:",e)