Spaces:
Sleeping
Sleeping
srijaydeshpande
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -93,7 +93,7 @@ def deidentify_doc(pdftext="", prompt="", maxtokens=600, temperature=1.2, top_pr
|
|
93 |
output = replace_words_with_asterisk(pdfcontent, wordstoremove.split(','))
|
94 |
return output
|
95 |
|
96 |
-
iterations=
|
97 |
output = pdftext
|
98 |
for iter in range(0,iterations):
|
99 |
output = get_output(output)
|
@@ -108,16 +108,18 @@ def mkdir(dir):
|
|
108 |
@spaces.GPU(duration=120)
|
109 |
def pdf_to_text(files, output_folder, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
|
110 |
output_folder = output_folder.replace('\\', '/')
|
|
|
111 |
for file in files:
|
112 |
-
file_name = os.path.basename(file)
|
113 |
-
file_name_splt = file_name.split('.')
|
114 |
-
print('File name is ', file_name)
|
115 |
-
print('output folder is ', output_folder)
|
116 |
-
if (len(file_name_splt) > 1 and file_name_splt[1] == 'pdf'):
|
117 |
-
page2content = process_document(file, page_ids=[0])
|
118 |
-
pdftext = page2content[1]
|
119 |
-
|
120 |
-
|
|
|
121 |
return anonymized_text
|
122 |
|
123 |
|
@@ -134,7 +136,8 @@ output_text = gr.Textbox()
|
|
134 |
output_path_component = gr.File(label="Select Output Path")
|
135 |
iface = gr.Interface(
|
136 |
fn=pdf_to_text,
|
137 |
-
inputs=['files', input_folder_text, "textbox", max_tokens, temp_slider, prob_slider],
|
|
|
138 |
outputs=output_text,
|
139 |
title='COBIx Endoscopy Report De-Identification',
|
140 |
description="This application assists to remove personal information from the uploaded clinical report",
|
|
|
93 |
output = replace_words_with_asterisk(pdfcontent, wordstoremove.split(','))
|
94 |
return output
|
95 |
|
96 |
+
iterations=1
|
97 |
output = pdftext
|
98 |
for iter in range(0,iterations):
|
99 |
output = get_output(output)
|
|
|
108 |
@spaces.GPU(duration=120)
|
109 |
def pdf_to_text(files, output_folder, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
|
110 |
output_folder = output_folder.replace('\\', '/')
|
111 |
+
files=[files]#remove later
|
112 |
for file in files:
|
113 |
+
# file_name = os.path.basename(file)
|
114 |
+
# file_name_splt = file_name.split('.')
|
115 |
+
# print('File name is ', file_name)
|
116 |
+
# print('output folder is ', output_folder)
|
117 |
+
# if (len(file_name_splt) > 1 and file_name_splt[1] == 'pdf'):
|
118 |
+
# page2content = process_document(file, page_ids=[0])
|
119 |
+
# pdftext = page2content[1]
|
120 |
+
pdftext = file # remove later
|
121 |
+
if (pdftext): #shift this if block to right later
|
122 |
+
anonymized_text = deidentify_doc(pdftext, prompt, maxtokens, temperature, top_probability)
|
123 |
return anonymized_text
|
124 |
|
125 |
|
|
|
136 |
output_path_component = gr.File(label="Select Output Path")
|
137 |
iface = gr.Interface(
|
138 |
fn=pdf_to_text,
|
139 |
+
# inputs=['files', input_folder_text, "textbox", max_tokens, temp_slider, prob_slider],
|
140 |
+
inputs=["textbox", input_folder_text, "textbox", max_tokens, temp_slider, prob_slider],
|
141 |
outputs=output_text,
|
142 |
title='COBIx Endoscopy Report De-Identification',
|
143 |
description="This application assists to remove personal information from the uploaded clinical report",
|