from myrpunct import RestorePuncts import gradio as gr import re def predict(input_text): rpunct = RestorePuncts() output_text = rpunct.punctuate(input_text) print("Punctuation finished...") # restore the carrige returns srt_file = input_text.replace("\n", " ") punctuated = output_text srt_file_strip=srt_file.strip() srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip) srt_file_array=srt_file_sub.split(' ') pcnt_file_array=punctuated.split(' ') # goal: restore the break points i.e. the same number of lines as the srt file # this is necessary, because each line in the srt file corresponds to a frame from the video if len(srt_file_array)!=len(pcnt_file_array): return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array) pcnt_file_array_hash = [] for idx, item in enumerate(srt_file_array): if item.endswith('#'): pcnt_file_array_hash.append(pcnt_file_array[idx]+'#') else: pcnt_file_array_hash.append(pcnt_file_array[idx]) # assemble the array back to a string pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n') return pcnt_file_cr if __name__ == "__main__": title = "Rpunct App" description = """ Description:
Model restores punctuation and case i.e. of the following punctuations -- [! ? . , - : ; ' ] and also the upper-casing of words.
""" examples = ["my name is clara and i live in berkeley california"] interface = gr.Interface(fn = predict, inputs = ["text"], outputs = ["text"], title = title, description = description, examples=examples, allow_flagging="never") interface.launch()