Spaces:
Sleeping
Sleeping
srijaydeshpande
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -78,16 +78,7 @@ def txt_to_html(text):
|
|
78 |
html_content += "</body></html>"
|
79 |
return html_content
|
80 |
|
81 |
-
|
82 |
-
def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
83 |
-
|
84 |
-
llm = Llama(
|
85 |
-
model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
|
86 |
-
flash_attn=True,
|
87 |
-
n_gpu_layers=81,
|
88 |
-
n_batch=1024,
|
89 |
-
n_ctx=8192,
|
90 |
-
)
|
91 |
|
92 |
#### Remove Dates ###
|
93 |
prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
|
@@ -185,8 +176,16 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
|
185 |
|
186 |
return output
|
187 |
|
|
|
188 |
def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95):
|
189 |
files=[files]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
for file in files:
|
191 |
if not file:
|
192 |
return 'Please provide a valid PDF'
|
@@ -197,7 +196,7 @@ def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95):
|
|
197 |
anonymized_text = ''
|
198 |
for page_id in page2content:
|
199 |
pdftext = page2content[page_id]
|
200 |
-
anonymized_text += deidentify_doc(pdftext, maxtokens, temperature, top_probability)
|
201 |
anonymized_text += '\n\n\n'
|
202 |
return anonymized_text
|
203 |
|
|
|
78 |
html_content += "</body></html>"
|
79 |
return html_content
|
80 |
|
81 |
+
def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
#### Remove Dates ###
|
84 |
prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
|
|
|
176 |
|
177 |
return output
|
178 |
|
179 |
+
@spaces.GPU(duration=80)
|
180 |
def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95):
|
181 |
files=[files]
|
182 |
+
llm = Llama(
|
183 |
+
model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
|
184 |
+
flash_attn=True,
|
185 |
+
n_gpu_layers=81,
|
186 |
+
n_batch=1024,
|
187 |
+
n_ctx=8192,
|
188 |
+
)
|
189 |
for file in files:
|
190 |
if not file:
|
191 |
return 'Please provide a valid PDF'
|
|
|
196 |
anonymized_text = ''
|
197 |
for page_id in page2content:
|
198 |
pdftext = page2content[page_id]
|
199 |
+
anonymized_text += deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability)
|
200 |
anonymized_text += '\n\n\n'
|
201 |
return anonymized_text
|
202 |
|