srijaydeshpande commited on
Commit
35e2666
·
verified ·
1 Parent(s): 1740a0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -68,9 +68,9 @@ def txt_to_html(text):
68
  html_content += "</body></html>"
69
  return html_content
70
 
71
- def deidentify_doc(pdftext="", maxtokens=600, temperature=1.2, top_probability=0.95):
72
 
73
- prompt = "Please anonymize the following clinical note. Replace all the following information with the term '[redacted]': Redact any strings that might be a name or initials, patients’ names, doctors’ names, the names Dr., redact any medical staff names, redact any strings that might be a location or address, such as '3970 Longview Drive', redact any strings that look like 'age 37', redact any dates and registration numbers, redact professions such as 'manager', redact any contact information."
74
 
75
  print('Max Tokens is ',maxtokens)
76
  print('Temperature is ',temperature)
@@ -118,12 +118,12 @@ def deidentify_doc(pdftext="", maxtokens=600, temperature=1.2, top_probability=0
118
 
119
  return output
120
 
121
- def pdf_to_text(file, maxtokens=600, temperature=1.2, top_probability=0.95):
122
  pdftext=""
123
  if(file):
124
  page2content = process_document(file, page_ids=[0])
125
  pdftext = page2content[1]
126
- display_text = deidentify_doc(pdftext, maxtokens, temperature, top_probability)
127
  html = txt_to_html(display_text)
128
  with open('out.html', "w", encoding="utf-8") as file:
129
  file.write(html)
@@ -149,7 +149,7 @@ prob_slider = gr.Slider(minimum=0, maximum=1, value=0.95, label="Max Probability
149
  max_tokens = gr.Number(value=600, label="Max Tokens")
150
  iface = gr.Interface(
151
  fn = pdf_to_text,
152
- inputs = ['file', max_tokens, temp_slider, prob_slider],
153
  outputs="html",
154
  title='COBIx Endoscopy Report De-Identification',
155
  description="This application assists to remove personal information from the uploaded clinical report",
 
68
  html_content += "</body></html>"
69
  return html_content
70
 
71
+ def deidentify_doc(pdftext="", prompt="", maxtokens=600, temperature=1.2, top_probability=0.95):
72
 
73
+ # prompt = "Please anonymize the following clinical note. Replace all the following information with the term '[redacted]': Redact any strings that might be a name or initials, patients’ names, doctors’ names, the names Dr., redact any medical staff names, redact any strings that might be a location or address, such as '3970 Longview Drive', redact any strings that look like 'age 37', redact any dates and registration numbers, redact professions such as 'manager', redact any contact information."
74
 
75
  print('Max Tokens is ',maxtokens)
76
  print('Temperature is ',temperature)
 
118
 
119
  return output
120
 
121
+ def pdf_to_text(file, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
122
  pdftext=""
123
  if(file):
124
  page2content = process_document(file, page_ids=[0])
125
  pdftext = page2content[1]
126
+ display_text = deidentify_doc(pdftext, prompt, maxtokens, temperature, top_probability)
127
  html = txt_to_html(display_text)
128
  with open('out.html', "w", encoding="utf-8") as file:
129
  file.write(html)
 
149
  max_tokens = gr.Number(value=600, label="Max Tokens")
150
  iface = gr.Interface(
151
  fn = pdf_to_text,
152
+ inputs = ['file', "textbox", max_tokens, temp_slider, prob_slider],
153
  outputs="html",
154
  title='COBIx Endoscopy Report De-Identification',
155
  description="This application assists to remove personal information from the uploaded clinical report",