Spaces:
Running
on
Zero
Running
on
Zero
srijaydeshpande
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -68,9 +68,9 @@ def txt_to_html(text):
|
|
68 |
html_content += "</body></html>"
|
69 |
return html_content
|
70 |
|
71 |
-
def deidentify_doc(pdftext="", maxtokens=600, temperature=1.2, top_probability=0.95):
|
72 |
|
73 |
-
prompt = "Please anonymize the following clinical note. Replace all the following information with the term '[redacted]': Redact any strings that might be a name or initials, patients’ names, doctors’ names, the names Dr., redact any medical staff names, redact any strings that might be a location or address, such as '3970 Longview Drive', redact any strings that look like 'age 37', redact any dates and registration numbers, redact professions such as 'manager', redact any contact information."
|
74 |
|
75 |
print('Max Tokens is ',maxtokens)
|
76 |
print('Temperature is ',temperature)
|
@@ -118,12 +118,12 @@ def deidentify_doc(pdftext="", maxtokens=600, temperature=1.2, top_probability=0
|
|
118 |
|
119 |
return output
|
120 |
|
121 |
-
def pdf_to_text(file, maxtokens=600, temperature=1.2, top_probability=0.95):
|
122 |
pdftext=""
|
123 |
if(file):
|
124 |
page2content = process_document(file, page_ids=[0])
|
125 |
pdftext = page2content[1]
|
126 |
-
display_text = deidentify_doc(pdftext, maxtokens, temperature, top_probability)
|
127 |
html = txt_to_html(display_text)
|
128 |
with open('out.html', "w", encoding="utf-8") as file:
|
129 |
file.write(html)
|
@@ -149,7 +149,7 @@ prob_slider = gr.Slider(minimum=0, maximum=1, value=0.95, label="Max Probability
|
|
149 |
max_tokens = gr.Number(value=600, label="Max Tokens")
|
150 |
iface = gr.Interface(
|
151 |
fn = pdf_to_text,
|
152 |
-
inputs = ['file', max_tokens, temp_slider, prob_slider],
|
153 |
outputs="html",
|
154 |
title='COBIx Endoscopy Report De-Identification',
|
155 |
description="This application assists to remove personal information from the uploaded clinical report",
|
|
|
68 |
html_content += "</body></html>"
|
69 |
return html_content
|
70 |
|
71 |
+
def deidentify_doc(pdftext="", prompt="", maxtokens=600, temperature=1.2, top_probability=0.95):
|
72 |
|
73 |
+
# prompt = "Please anonymize the following clinical note. Replace all the following information with the term '[redacted]': Redact any strings that might be a name or initials, patients’ names, doctors’ names, the names Dr., redact any medical staff names, redact any strings that might be a location or address, such as '3970 Longview Drive', redact any strings that look like 'age 37', redact any dates and registration numbers, redact professions such as 'manager', redact any contact information."
|
74 |
|
75 |
print('Max Tokens is ',maxtokens)
|
76 |
print('Temperature is ',temperature)
|
|
|
118 |
|
119 |
return output
|
120 |
|
121 |
+
def pdf_to_text(file, prompt, maxtokens=600, temperature=1.2, top_probability=0.95):
|
122 |
pdftext=""
|
123 |
if(file):
|
124 |
page2content = process_document(file, page_ids=[0])
|
125 |
pdftext = page2content[1]
|
126 |
+
display_text = deidentify_doc(pdftext, prompt, maxtokens, temperature, top_probability)
|
127 |
html = txt_to_html(display_text)
|
128 |
with open('out.html', "w", encoding="utf-8") as file:
|
129 |
file.write(html)
|
|
|
149 |
max_tokens = gr.Number(value=600, label="Max Tokens")
|
150 |
iface = gr.Interface(
|
151 |
fn = pdf_to_text,
|
152 |
+
inputs = ['file', "textbox", max_tokens, temp_slider, prob_slider],
|
153 |
outputs="html",
|
154 |
title='COBIx Endoscopy Report De-Identification',
|
155 |
description="This application assists to remove personal information from the uploaded clinical report",
|