srijaydeshpande commited on
Commit
50dfc74
·
verified ·
1 Parent(s): 37fb2ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -93,7 +93,7 @@ def deidentify_doc(pdftext="", prompt="", maxtokens=600, temperature=1.2, top_pr
93
  output = replace_words_with_asterisk(pdfcontent, wordstoremove.split(','))
94
  return output
95
 
96
- iterations=2
97
  output = pdftext
98
  for iter in range(0,iterations):
99
  output = get_output(output)
@@ -108,16 +108,18 @@ def mkdir(dir):
108
  @spaces.GPU(duration=120)
109
  def pdf_to_text(files, output_folder, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
110
  output_folder = output_folder.replace('\\', '/')
 
111
  for file in files:
112
- file_name = os.path.basename(file)
113
- file_name_splt = file_name.split('.')
114
- print('File name is ', file_name)
115
- print('output folder is ', output_folder)
116
- if (len(file_name_splt) > 1 and file_name_splt[1] == 'pdf'):
117
- page2content = process_document(file, page_ids=[0])
118
- pdftext = page2content[1]
119
- if (pdftext):
120
- anonymized_text = deidentify_doc(pdftext, prompt, maxtokens, temperature, top_probability)
 
121
  return anonymized_text
122
 
123
 
@@ -134,7 +136,8 @@ output_text = gr.Textbox()
134
  output_path_component = gr.File(label="Select Output Path")
135
  iface = gr.Interface(
136
  fn=pdf_to_text,
137
- inputs=['files', input_folder_text, "textbox", max_tokens, temp_slider, prob_slider],
 
138
  outputs=output_text,
139
  title='COBIx Endoscopy Report De-Identification',
140
  description="This application assists to remove personal information from the uploaded clinical report",
 
93
  output = replace_words_with_asterisk(pdfcontent, wordstoremove.split(','))
94
  return output
95
 
96
+ iterations=1
97
  output = pdftext
98
  for iter in range(0,iterations):
99
  output = get_output(output)
 
108
  @spaces.GPU(duration=120)
109
  def pdf_to_text(files, output_folder, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
110
  output_folder = output_folder.replace('\\', '/')
111
+ files=[files]#remove later
112
  for file in files:
113
+ # file_name = os.path.basename(file)
114
+ # file_name_splt = file_name.split('.')
115
+ # print('File name is ', file_name)
116
+ # print('output folder is ', output_folder)
117
+ # if (len(file_name_splt) > 1 and file_name_splt[1] == 'pdf'):
118
+ # page2content = process_document(file, page_ids=[0])
119
+ # pdftext = page2content[1]
120
+ pdftext = file # remove later
121
+ if (pdftext): #shift this if block to right later
122
+ anonymized_text = deidentify_doc(pdftext, prompt, maxtokens, temperature, top_probability)
123
  return anonymized_text
124
 
125
 
 
136
  output_path_component = gr.File(label="Select Output Path")
137
  iface = gr.Interface(
138
  fn=pdf_to_text,
139
+ # inputs=['files', input_folder_text, "textbox", max_tokens, temp_slider, prob_slider],
140
+ inputs=["textbox", input_folder_text, "textbox", max_tokens, temp_slider, prob_slider],
141
  outputs=output_text,
142
  title='COBIx Endoscopy Report De-Identification',
143
  description="This application assists to remove personal information from the uploaded clinical report",