Spaces:

srijaydeshpande
/

DeID

Running on Zero

App Files Files Community

srijaydeshpande commited on Jun 28, 2024

Commit

e289e8d

verified ·

1 Parent(s): 11f2903

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -11

app.py CHANGED Viewed

@@ -79,36 +79,37 @@ def txt_to_html(text):
     return html_content
 def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
-    # #### Remove Locations and Addresses ###
-    prompt = "In the following text replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address with term [address]. It is important that all addresses are fully replaced with [address]."
     output = llm.create_chat_completion(
         messages=[
             {"role": "assistant", "content": prompt},
             {
                 "role": "user",
-                "content": pdftext
             }
         ],
         max_tokens=maxtokens,
         temperature=temperature
     )
     output = output['choices'][0]['message']['content']
     # Remove starting header string in output
     find_index = output.find(' '.join(pdftext.split()[:3]))
     if find_index != -1:
         output = output[find_index:].strip()
-    #### Remove Dates ###
-    prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
     output = llm.create_chat_completion(
         messages=[
             {"role": "assistant", "content": prompt},
             {
                 "role": "user",
-                "content": output
             }
         ],
         max_tokens=maxtokens,
@@ -123,6 +124,7 @@ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
         output = output[find_index:].strip()
     #### Remove Names ###
     prompt = "In the following text replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists."
     output = llm.create_chat_completion(
@@ -143,8 +145,7 @@ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
     if find_index != -1:
         output = output[find_index:].strip()
-    # print('---------------Remove Names-----------------------')
-    # print(output)
     ### Remove Registration Numbers ###

     return html_content
 def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
+    #### Remove Dates ###
+    prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
     output = llm.create_chat_completion(
         messages=[
             {"role": "assistant", "content": prompt},
             {
                 "role": "user",
+                "content": output
             }
         ],
         max_tokens=maxtokens,
         temperature=temperature
     )
     output = output['choices'][0]['message']['content']
     # Remove starting header string in output
     find_index = output.find(' '.join(pdftext.split()[:3]))
     if find_index != -1:
         output = output[find_index:].strip()
+    # #### Remove Locations and Addresses ###
+    prompt = "In the following text replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address with term [address]. It is important that all addresses are fully replaced with [address]."
     output = llm.create_chat_completion(
         messages=[
             {"role": "assistant", "content": prompt},
             {
                 "role": "user",
+                "content": pdftext
             }
         ],
         max_tokens=maxtokens,
         output = output[find_index:].strip()
     #### Remove Names ###
     prompt = "In the following text replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists."
     output = llm.create_chat_completion(
     if find_index != -1:
         output = output[find_index:].strip()
     ### Remove Registration Numbers ###