Spaces:
Sleeping
Sleeping
File size: 1,913 Bytes
3b31931 67916b1 3b31931 67916b1 3b31931 86d0ab8 3b31931 67916b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from myrpunct import RestorePuncts
import gradio as gr
import re
def predict(input_text):
rpunct = RestorePuncts()
output_text = rpunct.punctuate(input_text)
print("Punctuation finished...")
# restore the carrige returns
srt_file = input_text.replace("\n", " ")
punctuated = output_text
srt_file_strip=srt_file.strip()
srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
srt_file_array=srt_file_sub.split(' ')
pcnt_file_array=punctuated.split(' ')
# goal: restore the break points i.e. the same number of lines as the srt file
# this is necessary, because each line in the srt file corresponds to a frame from the video
if len(srt_file_array)!=len(pcnt_file_array):
return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
pcnt_file_array_hash = []
for idx, item in enumerate(srt_file_array):
if item.endswith('#'):
pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
else:
pcnt_file_array_hash.append(pcnt_file_array[idx])
# assemble the array back to a string
pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
return pcnt_file_cr
if __name__ == "__main__":
title = "Rpunct App"
description = """
<b>Description</b>: <br>
Model restores punctuation and case i.e. of the following punctuations -- [! ? . , - : ; ' ] and also the upper-casing of words. <br>
"""
examples = ["my name is clara and i live in berkeley california"]
interface = gr.Interface(fn = predict,
inputs = ["text"],
outputs = ["text"],
title = title,
description = description,
examples=examples,
allow_flagging="never")
interface.launch()
|