srijaydeshpande commited on
Commit
f19eb92
·
verified ·
1 Parent(s): cec9932

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -78,16 +78,7 @@ def txt_to_html(text):
78
  html_content += "</body></html>"
79
  return html_content
80
 
81
- @spaces.GPU(duration=80)
82
- def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
83
-
84
- llm = Llama(
85
- model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
86
- flash_attn=True,
87
- n_gpu_layers=81,
88
- n_batch=1024,
89
- n_ctx=8192,
90
- )
91
 
92
  #### Remove Dates ###
93
  prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
@@ -185,8 +176,16 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
185
 
186
  return output
187
 
 
188
  def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95):
189
  files=[files]
 
 
 
 
 
 
 
190
  for file in files:
191
  if not file:
192
  return 'Please provide a valid PDF'
@@ -197,7 +196,7 @@ def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95):
197
  anonymized_text = ''
198
  for page_id in page2content:
199
  pdftext = page2content[page_id]
200
- anonymized_text += deidentify_doc(pdftext, maxtokens, temperature, top_probability)
201
  anonymized_text += '\n\n\n'
202
  return anonymized_text
203
 
 
78
  html_content += "</body></html>"
79
  return html_content
80
 
81
+ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
 
 
 
 
 
 
 
 
 
82
 
83
  #### Remove Dates ###
84
  prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
 
176
 
177
  return output
178
 
179
+ @spaces.GPU(duration=80)
180
  def pdf_to_text(files, maxtokens=2048, temperature=0, top_probability=0.95):
181
  files=[files]
182
+ llm = Llama(
183
+ model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
184
+ flash_attn=True,
185
+ n_gpu_layers=81,
186
+ n_batch=1024,
187
+ n_ctx=8192,
188
+ )
189
  for file in files:
190
  if not file:
191
  return 'Please provide a valid PDF'
 
196
  anonymized_text = ''
197
  for page_id in page2content:
198
  pdftext = page2content[page_id]
199
+ anonymized_text += deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability)
200
  anonymized_text += '\n\n\n'
201
  return anonymized_text
202