from myrpunct import RestorePuncts
import gradio as gr
import re
def predict(input_text):
rpunct = RestorePuncts()
output_text = rpunct.punctuate(input_text)
print("Punctuation finished...")
# restore the carrige returns
srt_file = input_text.replace("\n", " ")
punctuated = output_text
srt_file_strip=srt_file.strip()
srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
srt_file_array=srt_file_sub.split(' ')
pcnt_file_array=punctuated.split(' ')
# goal: restore the break points i.e. the same number of lines as the srt file
# this is necessary, because each line in the srt file corresponds to a frame from the video
if len(srt_file_array)!=len(pcnt_file_array):
return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
pcnt_file_array_hash = []
for idx, item in enumerate(srt_file_array):
if item.endswith('#'):
pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
else:
pcnt_file_array_hash.append(pcnt_file_array[idx])
# assemble the array back to a string
pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
return pcnt_file_cr
if __name__ == "__main__":
title = "Rpunct App"
description = """
Description:
Model restores punctuation and case i.e. of the following punctuations -- [! ? . , - : ; ' ] and also the upper-casing of words.
"""
examples = ["my name is clara and i live in berkeley california"]
interface = gr.Interface(fn = predict,
inputs = ["text"],
outputs = ["text"],
title = title,
description = description,
examples=examples,
allow_flagging="never")
interface.launch()