Spaces:
Running
on
Zero
Running
on
Zero
srijaydeshpande
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -79,36 +79,37 @@ def txt_to_html(text):
|
|
79 |
return html_content
|
80 |
|
81 |
def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
|
82 |
-
|
83 |
-
|
84 |
-
prompt = "In the following text replace
|
85 |
output = llm.create_chat_completion(
|
86 |
messages=[
|
87 |
{"role": "assistant", "content": prompt},
|
88 |
{
|
89 |
"role": "user",
|
90 |
-
"content":
|
91 |
}
|
92 |
],
|
93 |
max_tokens=maxtokens,
|
94 |
temperature=temperature
|
95 |
)
|
96 |
output = output['choices'][0]['message']['content']
|
97 |
-
|
98 |
|
99 |
# Remove starting header string in output
|
100 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
101 |
if find_index != -1:
|
102 |
output = output[find_index:].strip()
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
106 |
output = llm.create_chat_completion(
|
107 |
messages=[
|
108 |
{"role": "assistant", "content": prompt},
|
109 |
{
|
110 |
"role": "user",
|
111 |
-
"content":
|
112 |
}
|
113 |
],
|
114 |
max_tokens=maxtokens,
|
@@ -123,6 +124,7 @@ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
|
|
123 |
output = output[find_index:].strip()
|
124 |
|
125 |
|
|
|
126 |
#### Remove Names ###
|
127 |
prompt = "In the following text replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists."
|
128 |
output = llm.create_chat_completion(
|
@@ -143,8 +145,7 @@ def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
|
|
143 |
if find_index != -1:
|
144 |
output = output[find_index:].strip()
|
145 |
|
146 |
-
|
147 |
-
# print(output)
|
148 |
|
149 |
|
150 |
### Remove Registration Numbers ###
|
|
|
79 |
return html_content
|
80 |
|
81 |
def deidentify_doc(llm, pdftext, maxtokens, temperature, top_probability):
|
82 |
+
|
83 |
+
#### Remove Dates ###
|
84 |
+
prompt = "In the following text replace only the calendar dates with term [date]. Example: if input is 'Date of birth: 15/5/1959 calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)' output should be 'Date of birth: [date] calculated BP (Systolic 158.00 mm, Diastolic 124.95 mm)'"
|
85 |
output = llm.create_chat_completion(
|
86 |
messages=[
|
87 |
{"role": "assistant", "content": prompt},
|
88 |
{
|
89 |
"role": "user",
|
90 |
+
"content": output
|
91 |
}
|
92 |
],
|
93 |
max_tokens=maxtokens,
|
94 |
temperature=temperature
|
95 |
)
|
96 |
output = output['choices'][0]['message']['content']
|
|
|
97 |
|
98 |
# Remove starting header string in output
|
99 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
100 |
if find_index != -1:
|
101 |
output = output[find_index:].strip()
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
# #### Remove Locations and Addresses ###
|
106 |
+
prompt = "In the following text replace location or address, such as '3970 Longview Drive, CV36HE' with term [address]. Replace complete GP address with term [address]. It is important that all addresses are fully replaced with [address]."
|
107 |
output = llm.create_chat_completion(
|
108 |
messages=[
|
109 |
{"role": "assistant", "content": prompt},
|
110 |
{
|
111 |
"role": "user",
|
112 |
+
"content": pdftext
|
113 |
}
|
114 |
],
|
115 |
max_tokens=maxtokens,
|
|
|
124 |
output = output[find_index:].strip()
|
125 |
|
126 |
|
127 |
+
|
128 |
#### Remove Names ###
|
129 |
prompt = "In the following text replace any person name with term [name]. It is important that all person names are replaced with term [name]. Remove any gender terms 'male' or 'female' if exists."
|
130 |
output = llm.create_chat_completion(
|
|
|
145 |
if find_index != -1:
|
146 |
output = output[find_index:].strip()
|
147 |
|
148 |
+
|
|
|
149 |
|
150 |
|
151 |
### Remove Registration Numbers ###
|