srijaydeshpande commited on
Commit
123786b
·
verified ·
1 Parent(s): 7b60721

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -84
app.py CHANGED
@@ -88,6 +88,7 @@ def txt_to_html(text):
88
  def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
89
 
90
  prompt = "In the following text, perform the following actions: 1. Replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' 2. Replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address, such as 'Phanton Medical Centre, Birmingham, CV36HE' with term [address]. It is important that all addresses are completely replaced with [address]. 3. Replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists. 4. Replace the nhs number and the case note number with term [ID]. Replace Hospital number with [ID]. 4. Replace age of person with [age]. It is important that all age numbers are completely replaced with [age]."
 
91
  output = llm.create_chat_completion(
92
  messages=[
93
  {"from": "user", "value": prompt + ' Text: ' + pdftext},
@@ -101,92 +102,20 @@ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
101
  find_index = output.find(' '.join(pdftext.split()[:3]))
102
  if find_index != -1:
103
  output = output[find_index:].strip()
104
-
105
- # #### Remove Dates ###
106
- # prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
107
- # output = llm.create_chat_completion(
108
- # messages=[
109
- # {"role": "assistant", "content": prompt},
110
- # {
111
- # "role": "user",
112
- # "content": pdftext
113
- # }
114
- # ],
115
- # max_tokens=maxtokens,
116
- # temperature=temperature
117
- # )
118
- # output = output['choices'][0]['message']['content']
119
-
120
- # # Remove starting header string in output
121
- # find_index = output.find(' '.join(pdftext.split()[:3]))
122
- # if find_index != -1:
123
- # output = output[find_index:].strip()
124
-
125
-
126
-
127
- # # #### Remove Locations and Addresses ###
128
- # prompt = "In the following text replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address, such as 'Phanton Medical Centre, Birmingham, CV36HE' with term [address]. It is important that all addresses are completely replaced with [address]."
129
- # output = llm.create_chat_completion(
130
- # messages=[
131
- # {"role": "assistant", "content": prompt},
132
- # {
133
- # "role": "user",
134
- # "content": output
135
- # }
136
- # ],
137
- # max_tokens=maxtokens,
138
- # temperature=temperature
139
- # )
140
- # output = output['choices'][0]['message']['content']
141
 
 
 
 
 
 
 
 
 
142
 
143
- # # Remove starting header string in output
144
- # find_index = output.find(' '.join(pdftext.split()[:3]))
145
- # if find_index != -1:
146
- # output = output[find_index:].strip()
147
-
148
-
149
-
150
- # #### Remove Names ###
151
- # prompt = "In the following text replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists."
152
- # output = llm.create_chat_completion(
153
- # messages=[
154
- # {"role": "assistant", "content": prompt},
155
- # {
156
- # "role": "user",
157
- # "content": output
158
- # }
159
- # ],
160
- # max_tokens=maxtokens,
161
- # temperature=temperature
162
- # )
163
- # output = output['choices'][0]['message']['content']
164
-
165
- # # Remove starting header string in output
166
- # find_index = output.find(' '.join(pdftext.split()[:3]))
167
- # if find_index != -1:
168
- # output = output[find_index:].strip()
169
-
170
- # ### Remove Registration Numbers ###
171
-
172
- # prompt = "In the following text replace the nhs number and the case note number with term [ID]. Replace Hospital number with [ID]."
173
- # output = llm.create_chat_completion(
174
- # messages=[
175
- # {"role": "assistant", "content": prompt},
176
- # {
177
- # "role": "user",
178
- # "content": output
179
- # }
180
- # ],
181
- # max_tokens=maxtokens,
182
- # temperature=temperature
183
- # )
184
- # output = output['choices'][0]['message']['content']
185
-
186
- # # Remove starting header string in output
187
- # find_index = output.find(' '.join(pdftext.split()[:3]))
188
- # if find_index != -1:
189
- # output = output[find_index:].strip()
190
 
191
  return output
192
 
 
88
  def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
89
 
90
  prompt = "In the following text, perform the following actions: 1. Replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' 2. Replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address, such as 'Phanton Medical Centre, Birmingham, CV36HE' with term [address]. It is important that all addresses are completely replaced with [address]. 3. Replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists. 4. Replace the nhs number and the case note number with term [ID]. Replace Hospital number with [ID]. 4. Replace age of person with [age]. It is important that all age numbers are completely replaced with [age]."
91
+
92
  output = llm.create_chat_completion(
93
  messages=[
94
  {"from": "user", "value": prompt + ' Text: ' + pdftext},
 
102
  find_index = output.find(' '.join(pdftext.split()[:3]))
103
  if find_index != -1:
104
  output = output[find_index:].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ output = llm.create_chat_completion(
107
+ messages=[
108
+ {"from": "user", "value": prompt + ' Text: ' + output},
109
+ ],
110
+ max_tokens=maxtokens,
111
+ temperature=temperature
112
+ )
113
+ output = output['choices'][0]['message']['content']
114
 
115
+ # Remove starting header string in output
116
+ find_index = output.find(' '.join(pdftext.split()[:3]))
117
+ if find_index != -1:
118
+ output = output[find_index:].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  return output
121