Spaces:

srijaydeshpande
/

DeID

Sleeping

App Files Files Community

srijaydeshpande commited on Sep 27, 2024

Commit

123786b

verified ·

1 Parent(s): 7b60721

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -84

app.py CHANGED Viewed

@@ -88,6 +88,7 @@ def txt_to_html(text):
 def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
     prompt = "In the following text, perform the following actions: 1. Replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' 2. Replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address, such as 'Phanton Medical Centre, Birmingham, CV36HE' with term [address]. It is important that all addresses are completely replaced with [address]. 3. Replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists. 4. Replace the nhs number and the case note number with term [ID]. Replace Hospital number with [ID]. 4. Replace age of person with [age]. It is important that all age numbers are completely replaced with [age]."
     output = llm.create_chat_completion(
         messages=[
             {"from": "user", "value": prompt + ' Text: ' + pdftext},
@@ -101,92 +102,20 @@ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
     find_index = output.find(' '.join(pdftext.split()[:3]))
     if find_index != -1:
         output = output[find_index:].strip()
-    # #### Remove Dates ###
-    # prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
-    # output = llm.create_chat_completion(
-    #     messages=[
-    #         {"role": "assistant", "content": prompt},
-    #         {
-    #             "role": "user",
-    #             "content": pdftext
-    #         }
-    #     ],
-    #     max_tokens=maxtokens,
-    #     temperature=temperature
-    # )
-    # output = output['choices'][0]['message']['content']
-    # # Remove starting header string in output
-    # find_index = output.find(' '.join(pdftext.split()[:3]))
-    # if find_index != -1:
-    #     output = output[find_index:].strip()
-    # # #### Remove Locations and Addresses ###
-    # prompt = "In the following text replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address, such as 'Phanton Medical Centre, Birmingham, CV36HE' with term [address]. It is important that all addresses are completely replaced with [address]."
-    # output = llm.create_chat_completion(
-    #     messages=[
-    #         {"role": "assistant", "content": prompt},
-    #         {
-    #             "role": "user",
-    #             "content": output
-    #         }
-    #     ],
-    #     max_tokens=maxtokens,
-    #     temperature=temperature
-    # )
-    # output = output['choices'][0]['message']['content']
-    # # Remove starting header string in output
-    # find_index = output.find(' '.join(pdftext.split()[:3]))
-    # if find_index != -1:
-    #     output = output[find_index:].strip()
-    # #### Remove Names ###
-    # prompt = "In the following text replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists."
-    # output = llm.create_chat_completion(
-    #     messages=[
-    #         {"role": "assistant", "content": prompt},
-    #         {
-    #             "role": "user",
-    #             "content": output
-    #         }
-    #     ],
-    #     max_tokens=maxtokens,
-    #     temperature=temperature
-    # )
-    # output = output['choices'][0]['message']['content']
-    # # Remove starting header string in output
-    # find_index = output.find(' '.join(pdftext.split()[:3]))
-    # if find_index != -1:
-    #     output = output[find_index:].strip()
-    # ### Remove Registration Numbers ###
-    # prompt = "In the following text replace the nhs number and the case note number with term [ID]. Replace Hospital number with [ID]."
-    # output = llm.create_chat_completion(
-    #     messages=[
-    #         {"role": "assistant", "content": prompt},
-    #         {
-    #             "role": "user",
-    #             "content": output
-    #         }
-    #     ],
-    #     max_tokens=maxtokens,
-    #     temperature=temperature
-    # )
-    # output = output['choices'][0]['message']['content']
-    # # Remove starting header string in output
-    # find_index = output.find(' '.join(pdftext.split()[:3]))
-    # if find_index != -1:
-    #     output = output[find_index:].strip()
     return output

 def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
     prompt = "In the following text, perform the following actions: 1. Replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' 2. Replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address, such as 'Phanton Medical Centre, Birmingham, CV36HE' with term [address]. It is important that all addresses are completely replaced with [address]. 3. Replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists. 4. Replace the nhs number and the case note number with term [ID]. Replace Hospital number with [ID]. 4. Replace age of person with [age]. It is important that all age numbers are completely replaced with [age]."
     output = llm.create_chat_completion(
         messages=[
             {"from": "user", "value": prompt + ' Text: ' + pdftext},
     find_index = output.find(' '.join(pdftext.split()[:3]))
     if find_index != -1:
         output = output[find_index:].strip()
+    output = llm.create_chat_completion(
+        messages=[
+            {"from": "user", "value": prompt + ' Text: ' + output},
+        ],
+        max_tokens=maxtokens,
+        temperature=temperature
+    )
+    output = output['choices'][0]['message']['content']
+    # Remove starting header string in output
+    find_index = output.find(' '.join(pdftext.split()[:3]))
+    if find_index != -1:
+        output = output[find_index:].strip()
     return output