Spaces:
Sleeping
Sleeping
Macrodove
commited on
Commit
·
0362dd5
1
Parent(s):
bd1cc4e
Added comment, changed output to arrays of SRTsegment
Browse filesFormer-commit-id: 97fea915a8272bdfd5a632e672dfe8e9709c4188
- evaluation/alignment.py +39 -21
evaluation/alignment.py
CHANGED
@@ -3,69 +3,87 @@ import numpy as np
|
|
3 |
sys.path.append('../src')
|
4 |
from srt_util.srt import SrtScript
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
def procedure(anchor, subsec, S_arr, subidx):
|
7 |
cache_idx = 0
|
8 |
-
while subidx != cache_idx:
|
9 |
cache_idx = subidx
|
10 |
-
if
|
|
|
11 |
break
|
12 |
sub = subsec[subidx]
|
13 |
if anchor.end < sub.start:
|
14 |
continue
|
|
|
15 |
if (anchor.start <= sub.start) and (sub.end <= anchor.end) or anchor.end - sub.start > sub.end - anchor.start:
|
16 |
-
S_arr[-1] += sub
|
17 |
subidx += 1
|
18 |
-
return subidx - 1
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def alignment(pred_path, gt_path):
|
21 |
pred = SrtScript.parse_from_srt_file(pred_path).segments
|
22 |
gt = SrtScript.parse_from_srt_file(gt_path).segments
|
23 |
pred_arr, gt_arr = [], []
|
24 |
-
idx_p, idx_t = 0, 0
|
25 |
|
26 |
while idx_p < len(pred) or idx_t < len(gt):
|
|
|
27 |
ps = pred[idx_p] if idx_p < len(pred) else None
|
28 |
gs = gt[idx_t] if idx_t < len(gt) else None
|
29 |
-
|
30 |
if not ps:
|
31 |
-
|
|
|
32 |
pred_arr.append('')
|
33 |
idx_t += 1
|
34 |
continue
|
35 |
|
36 |
if not gs:
|
37 |
-
|
|
|
38 |
gt_arr.append('')
|
39 |
idx_p += 1
|
40 |
continue
|
41 |
|
42 |
ps_dur = ps.end - ps.start
|
43 |
gs_dur = gs.end - gs.start
|
44 |
-
|
|
|
45 |
if ps_dur <= gs_dur:
|
|
|
46 |
if ps.end < gs.start:
|
47 |
-
pred_arr.append(ps
|
48 |
-
gt_arr.append('')
|
49 |
-
idx_t -= 1
|
50 |
else:
|
51 |
-
gt_arr.append(gs
|
52 |
if gs.end >= ps.start:
|
53 |
-
pred_arr.append(ps
|
54 |
idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
|
55 |
-
else:
|
56 |
pred_arr.append('')
|
57 |
idx_p -= 1
|
58 |
else:
|
|
|
59 |
if gs.end < ps.start:
|
60 |
-
gt_arr.append(gs
|
61 |
-
pred_arr.append('')
|
62 |
-
idx_p -= 1
|
63 |
else:
|
64 |
-
pred_arr.append(ps
|
65 |
if ps.end >= gs.start:
|
66 |
-
gt_arr.append(gs
|
67 |
idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
|
68 |
-
else:
|
69 |
gt_arr.append('')
|
70 |
idx_t -= 1
|
71 |
|
|
|
3 |
sys.path.append('../src')
|
4 |
from srt_util.srt import SrtScript
|
5 |
|
6 |
+
|
7 |
+
# Helper method
|
8 |
+
# Align sub anchor segment pair via greedy approach
|
9 |
+
# Input: anchor segment, SRT segments, output array of sub, index of current sub
|
10 |
+
# Output: updated index of sub
|
11 |
def procedure(anchor, subsec, S_arr, subidx):
|
12 |
cache_idx = 0
|
13 |
+
while subidx != cache_idx: # Terminate when alignment stablizes
|
14 |
cache_idx = subidx
|
15 |
+
# if sub segment runs out during the loop, terminate
|
16 |
+
if subidx >= len(subsec):
|
17 |
break
|
18 |
sub = subsec[subidx]
|
19 |
if anchor.end < sub.start:
|
20 |
continue
|
21 |
+
# If next sub has a heavier overlap compartment, add to current alignment
|
22 |
if (anchor.start <= sub.start) and (sub.end <= anchor.end) or anchor.end - sub.start > sub.end - anchor.start:
|
23 |
+
S_arr[-1] += sub#.source_text
|
24 |
subidx += 1
|
|
|
25 |
|
26 |
+
return subidx - 1 # Reset last invalid update from loop
|
27 |
+
|
28 |
+
|
29 |
+
# Input: path1, path2
|
30 |
+
# Output: aligned array of SRTsegment corresponding to path1 path2
|
31 |
+
# Note: Modify comment with .source_text to get output array with string only
|
32 |
def alignment(pred_path, gt_path):
|
33 |
pred = SrtScript.parse_from_srt_file(pred_path).segments
|
34 |
gt = SrtScript.parse_from_srt_file(gt_path).segments
|
35 |
pred_arr, gt_arr = [], []
|
36 |
+
idx_p, idx_t = 0, 0 # idx_p: current index of pred segment, idx_t for ground truth
|
37 |
|
38 |
while idx_p < len(pred) or idx_t < len(gt):
|
39 |
+
# Check if one srt file runs out while reading
|
40 |
ps = pred[idx_p] if idx_p < len(pred) else None
|
41 |
gs = gt[idx_t] if idx_t < len(gt) else None
|
42 |
+
|
43 |
if not ps:
|
44 |
+
# If ps runs out, align gs segment with filler one by one
|
45 |
+
gt_arr.append(gs)#.source_text
|
46 |
pred_arr.append('')
|
47 |
idx_t += 1
|
48 |
continue
|
49 |
|
50 |
if not gs:
|
51 |
+
# If gs runs out, align ps segment with filler one by one
|
52 |
+
pred_arr.append(ps)#.source_text
|
53 |
gt_arr.append('')
|
54 |
idx_p += 1
|
55 |
continue
|
56 |
|
57 |
ps_dur = ps.end - ps.start
|
58 |
gs_dur = gs.end - gs.start
|
59 |
+
|
60 |
+
# Check for duration to decide anchor and sub
|
61 |
if ps_dur <= gs_dur:
|
62 |
+
# Detect segment with no overlap
|
63 |
if ps.end < gs.start:
|
64 |
+
pred_arr.append(ps)#.source_text
|
65 |
+
gt_arr.append('') # append filler
|
66 |
+
idx_t -= 1 # reset ground truth index
|
67 |
else:
|
68 |
+
gt_arr.append(gs)#.source_text
|
69 |
if gs.end >= ps.start:
|
70 |
+
pred_arr.append(ps)#.source_text
|
71 |
idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
|
72 |
+
else: # filler pairing
|
73 |
pred_arr.append('')
|
74 |
idx_p -= 1
|
75 |
else:
|
76 |
+
# same overlap checking procedure
|
77 |
if gs.end < ps.start:
|
78 |
+
gt_arr.append(gs)#.source_text
|
79 |
+
pred_arr.append('') # filler
|
80 |
+
idx_p -= 1 # reset
|
81 |
else:
|
82 |
+
pred_arr.append(ps)#.source_text
|
83 |
if ps.end >= gs.start:
|
84 |
+
gt_arr.append(gs)#.source_text
|
85 |
idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
|
86 |
+
else: # filler pairing
|
87 |
gt_arr.append('')
|
88 |
idx_t -= 1
|
89 |
|