vijul.shah commited on
Commit
8f8ef33
·
1 Parent(s): 4b41e60

Blink Detection Support Added, Predicted DIameter Post Analysis Plots Added

Browse files
Files changed (5) hide show
  1. app.py +72 -5
  2. app_utils.py +138 -66
  3. image.py +0 -32
  4. registrations/models.py +2 -70
  5. video.py +0 -48
app.py CHANGED
@@ -5,9 +5,11 @@ import os.path as osp
5
  from PIL import Image
6
  from io import BytesIO
7
  import numpy as np
 
8
  import streamlit as st
9
  from PIL import ImageOps
10
  from matplotlib import pyplot as plt
 
11
 
12
  root_path = osp.abspath(osp.join(__file__, osp.pardir))
13
  sys.path.append(root_path)
@@ -17,7 +19,7 @@ from app_utils import (
17
  extract_frames,
18
  is_image,
19
  is_video,
20
- display_results,
21
  overlay_text_on_frame,
22
  process_frames,
23
  process_video,
@@ -36,6 +38,18 @@ LABEL_MAP = ["left_pupil", "right_pupil"]
36
 
37
  def main():
38
  st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
39
  st.title("EyeDentify Playground")
40
  cols = st.columns((1, 1))
41
  cols[0].header("Input")
@@ -77,6 +91,8 @@ def main():
77
  )
78
  tv_model = st.sidebar.selectbox("Classification model", ["ResNet18", "ResNet50"], help="Supported Models")
79
 
 
 
80
  if st.sidebar.button("Predict Diameter & Compute CAM"):
81
  if uploaded_file is None:
82
  st.sidebar.error("Please upload an image or video")
@@ -90,8 +106,7 @@ def main():
90
  tv_model,
91
  pupil_selection,
92
  cam_method=CAM_METHODS[-1],
93
- output_path=None,
94
- codec=None,
95
  )
96
  # for ff in face_frames:
97
  # if ff["has_face"]:
@@ -115,11 +130,63 @@ def main():
115
 
116
  elif is_video(file_extension):
117
  output_video_path = f"{root_path}/tmp.webm"
118
- process_video(
119
- cols, video_frames, tv_model, pupil_selection, output_video_path, cam_method=CAM_METHODS[-1]
 
 
 
 
 
 
120
  )
121
  os.remove(video_path)
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  if __name__ == "__main__":
125
  main()
 
5
  from PIL import Image
6
  from io import BytesIO
7
  import numpy as np
8
+ import pandas as pd
9
  import streamlit as st
10
  from PIL import ImageOps
11
  from matplotlib import pyplot as plt
12
+ import altair as alt
13
 
14
  root_path = osp.abspath(osp.join(__file__, osp.pardir))
15
  sys.path.append(root_path)
 
19
  extract_frames,
20
  is_image,
21
  is_video,
22
+ convert_diameter,
23
  overlay_text_on_frame,
24
  process_frames,
25
  process_video,
 
38
 
39
  def main():
40
  st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide")
41
+ st.markdown(
42
+ """
43
+ <style>
44
+ /* Remove the top margin/padding */
45
+ .block-container {
46
+ padding-top: 0rem;
47
+ padding-bottom: 1rem; /* Adjust this as needed */
48
+ }
49
+ </style>
50
+ """,
51
+ unsafe_allow_html=True,
52
+ )
53
  st.title("EyeDentify Playground")
54
  cols = st.columns((1, 1))
55
  cols[0].header("Input")
 
91
  )
92
  tv_model = st.sidebar.selectbox("Classification model", ["ResNet18", "ResNet50"], help="Supported Models")
93
 
94
+ blink_detection = st.sidebar.checkbox("Detect Blinks")
95
+
96
  if st.sidebar.button("Predict Diameter & Compute CAM"):
97
  if uploaded_file is None:
98
  st.sidebar.error("Please upload an image or video")
 
106
  tv_model,
107
  pupil_selection,
108
  cam_method=CAM_METHODS[-1],
109
+ blink_detection=blink_detection,
 
110
  )
111
  # for ff in face_frames:
112
  # if ff["has_face"]:
 
130
 
131
  elif is_video(file_extension):
132
  output_video_path = f"{root_path}/tmp.webm"
133
+ input_frames, output_frames, predicted_diameters, face_frames = process_video(
134
+ cols,
135
+ video_frames,
136
+ tv_model,
137
+ pupil_selection,
138
+ output_video_path,
139
+ cam_method=CAM_METHODS[-1],
140
+ blink_detection=blink_detection,
141
  )
142
  os.remove(video_path)
143
 
144
+ num_columns = len(predicted_diameters)
145
+
146
+ # Create a layout for the charts
147
+ cols = st.columns(num_columns)
148
+
149
+ colors = ["#2ca02c", "#d62728", "#1f77b4", "#ff7f0e"] # Green, Red, Blue, Orange
150
+
151
+ # Iterate through categories and assign charts to columns
152
+ for i, (category, values) in enumerate(predicted_diameters.items()):
153
+ with cols[i]: # Directly use the column index
154
+ # st.subheader(category) # Add a subheader for the category
155
+
156
+ # Convert values to numeric, replacing non-numeric values with None
157
+ values = [convert_diameter(value) for value in values]
158
+
159
+ # Create a DataFrame from the values for Altair
160
+ df = pd.DataFrame(values, columns=[category])
161
+ df["Frame"] = range(1, len(values) + 1) # Create a frame column starting from 1
162
+
163
+ # Get the min and max values for y-axis limits, ignoring None
164
+ min_value = min(filter(lambda x: x is not None, values), default=None)
165
+ max_value = max(filter(lambda x: x is not None, values), default=None)
166
+
167
+ # Create an Altair chart with y-axis limits
168
+ chart = (
169
+ alt.Chart(df)
170
+ .mark_line(point=True, color=colors[i])
171
+ .encode(
172
+ x=alt.X("Frame:Q", title="Frame Number"),
173
+ y=alt.Y(
174
+ f"{category}:Q",
175
+ title="Diameter",
176
+ scale=alt.Scale(domain=[min_value, max_value]),
177
+ ),
178
+ tooltip=[
179
+ alt.Tooltip("Frame:Q", title="Frame Number"),
180
+ alt.Tooltip(f"{category}:Q", title="Diameter"),
181
+ ],
182
+ )
183
+ .properties(title=f"{category} - Predicted Diameters")
184
+ .configure_axis(grid=True)
185
+ )
186
+
187
+ # Display the Altair chart
188
+ st.altair_chart(chart, use_container_width=True)
189
+
190
 
191
  if __name__ == "__main__":
192
  main()
app_utils.py CHANGED
@@ -110,7 +110,7 @@ def overlay_text_on_frame(frame, text, position=(16, 20)):
110
  return cv2.putText(frame, text, position, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, cv2.LINE_AA)
111
 
112
 
113
- def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, output_path, codec):
114
  upscale = "-"
115
  upscale_method_or_model = "-"
116
  if upscale == "-":
@@ -123,14 +123,21 @@ def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, outp
123
  config_file = {
124
  "sr_configs": sr_configs,
125
  "feature_extraction_configs": {
126
- "blink_detection": False,
127
  "upscale": upscale,
128
  "extraction_library": "mediapipe",
129
  },
130
  }
 
 
 
 
 
 
131
  left_pupil_model = None
 
132
  right_pupil_model = None
133
- face_frames = []
134
  output_frames = {}
135
  input_frames = {}
136
  predicted_diameters = {}
@@ -163,21 +170,59 @@ def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, outp
163
  input_frames[eye_type] = []
164
  predicted_diameters[eye_type] = []
165
 
 
 
 
 
166
  if output_path:
167
  video_cols = cols[1].columns(len(input_frames.keys()))
168
 
169
- video_input_placeholders = {}
170
  for i, eye_type in enumerate(list(input_frames.keys())):
171
  video_input_placeholders[eye_type] = video_cols[i].empty()
172
 
173
- video_output_placeholders = {}
174
  for i, eye_type in enumerate(list(input_frames.keys())):
175
  video_output_placeholders[eye_type] = video_cols[i].empty()
176
 
177
- video_predictions_placeholders = {}
178
  for i, eye_type in enumerate(list(input_frames.keys())):
179
  video_predictions_placeholders[eye_type] = video_cols[i].empty()
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  ds_creation = EyeDentityDatasetCreation(
182
  feature_extraction_configs=config_file["feature_extraction_configs"],
183
  sr_configs=config_file["sr_configs"],
@@ -212,17 +257,16 @@ def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, outp
212
 
213
  if ds_results is not None and "eyes" in ds_results.keys():
214
  blinked = ds_results["eyes"]["blinked"]
215
- if not blinked:
216
- if "left_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["left_eye"] is not None:
217
- left_eye = ds_results["eyes"]["left_eye"]
218
- left_eye = to_pil_image(left_eye).convert("RGB")
219
- left_eye = preprocess_function(left_eye)
220
- left_eye = left_eye.unsqueeze(0)
221
- if "right_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["right_eye"] is not None:
222
- right_eye = ds_results["eyes"]["right_eye"]
223
- right_eye = to_pil_image(right_eye).convert("RGB")
224
- right_eye = preprocess_function(right_eye)
225
- right_eye = right_eye.unsqueeze(0)
226
  else:
227
  input_img = preprocess_function(input_img)
228
  input_img = input_img.unsqueeze(0)
@@ -235,57 +279,67 @@ def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, outp
235
  right_eye = input_img
236
 
237
  for i, eye_type in enumerate(selected_eyes):
238
- if left_eye is not None and eye_type == "left_eye":
239
- if left_pupil_cam_extractor is None:
240
- if tv_model == "ResNet18":
241
- target_layer = left_pupil_model.resnet.layer4[-1].conv2
242
- elif tv_model == "ResNet50":
243
- target_layer = left_pupil_model.resnet.layer4[-1].conv3
244
- else:
245
- raise Exception(f"No target layer available for selected model: {tv_model}")
246
- left_pupil_cam_extractor = torchcam_methods.__dict__[cam_method](
247
- left_pupil_model,
248
- target_layer=target_layer,
249
- fc_layer=left_pupil_model.resnet.fc,
250
- input_shape=left_eye.shape,
251
- )
252
- output = left_pupil_model(left_eye)
253
- predicted_diameter = output[0].item()
254
- act_maps = left_pupil_cam_extractor(0, output)
255
- activation_map = act_maps[0] if len(act_maps) == 1 else left_pupil_cam_extractor.fuse_cams(act_maps)
256
- input_image_pil = to_pil_image(left_eye.squeeze(0))
257
- elif right_eye is not None and eye_type == "right_eye":
258
- if right_pupil_cam_extractor is None:
259
- if tv_model == "ResNet18":
260
- target_layer = right_pupil_model.resnet.layer4[-1].conv2
261
- elif tv_model == "ResNet50":
262
- target_layer = right_pupil_model.resnet.layer4[-1].conv3
263
- else:
264
- raise Exception(f"No target layer available for selected model: {tv_model}")
265
- right_pupil_cam_extractor = torchcam_methods.__dict__[cam_method](
266
- right_pupil_model,
267
- target_layer=target_layer,
268
- fc_layer=right_pupil_model.resnet.fc,
269
- input_shape=right_eye.shape,
270
- )
271
- output = right_pupil_model(right_eye)
272
- predicted_diameter = output[0].item()
273
- act_maps = right_pupil_cam_extractor(0, output)
274
- activation_map = act_maps[0] if len(act_maps) == 1 else right_pupil_cam_extractor.fuse_cams(act_maps)
275
- input_image_pil = to_pil_image(right_eye.squeeze(0))
276
 
277
  if blinked:
278
- zeros_img = to_pil_image(np.zeros((256, 256, 3), dtype=np.uint8))
279
- input_image_pil = zeros_img
280
- result = zeros_img
281
- predicted_diameter = 0
 
 
 
 
 
 
 
 
282
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # Create CAM overlay
284
  activation_map_pil = to_pil_image(activation_map, mode="F")
285
  result = overlay_mask(input_image_pil, activation_map_pil, alpha=0.5)
286
-
287
- input_img_np = np.array(input_image_pil)
288
- output_img_np = np.array(result)
289
 
290
  # Add frame and predicted diameter to lists
291
  input_frames[eye_type].append(input_img_np)
@@ -295,7 +349,10 @@ def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, outp
295
  if output_path:
296
  height, width, _ = output_img_np.shape
297
  frame = np.zeros((height, width, 3), dtype=np.uint8)
298
- text = f"{predicted_diameter:.2f}"
 
 
 
299
  frame = overlay_text_on_frame(frame, text)
300
 
301
  video_input_placeholders[eye_type].image(input_img_np, use_column_width=True)
@@ -385,7 +442,10 @@ def show_pred_text_frames(output_frames, output_path, predicted_diameters, codec
385
 
386
  for diameter in predicted_diameters[eye_type]:
387
  frame = np.zeros((height, width, 3), dtype=np.uint8)
388
- text = f"{diameter:.2f}"
 
 
 
389
  frame = overlay_text_on_frame(frame, text)
390
  out.write(frame)
391
  out.release()
@@ -398,7 +458,7 @@ def show_pred_text_frames(output_frames, output_path, predicted_diameters, codec
398
  os.remove(output_path)
399
 
400
 
401
- def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method):
402
 
403
  resized_frames = []
404
  for i, frame in enumerate(video_frames):
@@ -408,4 +468,16 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
408
  file_format = output_path.split(".")[-1]
409
  codec, extension = get_codec_and_extension(file_format)
410
 
411
- process_frames(cols, resized_frames, tv_model, pupil_selection, cam_method, output_path, codec)
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  return cv2.putText(frame, text, position, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, cv2.LINE_AA)
111
 
112
 
113
+ def get_configs(blink_detection=False):
114
  upscale = "-"
115
  upscale_method_or_model = "-"
116
  if upscale == "-":
 
123
  config_file = {
124
  "sr_configs": sr_configs,
125
  "feature_extraction_configs": {
126
+ "blink_detection": blink_detection,
127
  "upscale": upscale,
128
  "extraction_library": "mediapipe",
129
  },
130
  }
131
+
132
+ return config_file
133
+
134
+
135
+ def setup(cols, pupil_selection, tv_model, output_path):
136
+
137
  left_pupil_model = None
138
+ left_pupil_cam_extractor = None
139
  right_pupil_model = None
140
+ right_pupil_cam_extractor = None
141
  output_frames = {}
142
  input_frames = {}
143
  predicted_diameters = {}
 
170
  input_frames[eye_type] = []
171
  predicted_diameters[eye_type] = []
172
 
173
+ video_input_placeholders = {}
174
+ video_output_placeholders = {}
175
+ video_predictions_placeholders = {}
176
+
177
  if output_path:
178
  video_cols = cols[1].columns(len(input_frames.keys()))
179
 
 
180
  for i, eye_type in enumerate(list(input_frames.keys())):
181
  video_input_placeholders[eye_type] = video_cols[i].empty()
182
 
 
183
  for i, eye_type in enumerate(list(input_frames.keys())):
184
  video_output_placeholders[eye_type] = video_cols[i].empty()
185
 
 
186
  for i, eye_type in enumerate(list(input_frames.keys())):
187
  video_predictions_placeholders[eye_type] = video_cols[i].empty()
188
 
189
+ return (
190
+ selected_eyes,
191
+ input_frames,
192
+ output_frames,
193
+ predicted_diameters,
194
+ video_input_placeholders,
195
+ video_output_placeholders,
196
+ video_predictions_placeholders,
197
+ left_pupil_model,
198
+ left_pupil_cam_extractor,
199
+ right_pupil_model,
200
+ right_pupil_cam_extractor,
201
+ )
202
+
203
+
204
+ def process_frames(
205
+ cols, input_imgs, tv_model, pupil_selection, cam_method, output_path=None, codec=None, blink_detection=False
206
+ ):
207
+
208
+ config_file = get_configs(blink_detection)
209
+
210
+ face_frames = []
211
+
212
+ (
213
+ selected_eyes,
214
+ input_frames,
215
+ output_frames,
216
+ predicted_diameters,
217
+ video_input_placeholders,
218
+ video_output_placeholders,
219
+ video_predictions_placeholders,
220
+ left_pupil_model,
221
+ left_pupil_cam_extractor,
222
+ right_pupil_model,
223
+ right_pupil_cam_extractor,
224
+ ) = setup(cols, pupil_selection, tv_model, output_path)
225
+
226
  ds_creation = EyeDentityDatasetCreation(
227
  feature_extraction_configs=config_file["feature_extraction_configs"],
228
  sr_configs=config_file["sr_configs"],
 
257
 
258
  if ds_results is not None and "eyes" in ds_results.keys():
259
  blinked = ds_results["eyes"]["blinked"]
260
+ if "left_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["left_eye"] is not None:
261
+ left_eye = ds_results["eyes"]["left_eye"]
262
+ left_eye = to_pil_image(left_eye).convert("RGB")
263
+ left_eye = preprocess_function(left_eye)
264
+ left_eye = left_eye.unsqueeze(0)
265
+ if "right_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["right_eye"] is not None:
266
+ right_eye = ds_results["eyes"]["right_eye"]
267
+ right_eye = to_pil_image(right_eye).convert("RGB")
268
+ right_eye = preprocess_function(right_eye)
269
+ right_eye = right_eye.unsqueeze(0)
 
270
  else:
271
  input_img = preprocess_function(input_img)
272
  input_img = input_img.unsqueeze(0)
 
279
  right_eye = input_img
280
 
281
  for i, eye_type in enumerate(selected_eyes):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
  if blinked:
284
+
285
+ if left_eye is not None and eye_type == "left_eye":
286
+ _, height, width = left_eye.squeeze(0).shape
287
+ input_image_pil = to_pil_image(left_eye.squeeze(0))
288
+ elif right_eye is not None and eye_type == "right_eye":
289
+ _, height, width = right_eye.squeeze(0).shape
290
+ input_image_pil = to_pil_image(right_eye.squeeze(0))
291
+
292
+ input_img_np = np.array(input_image_pil)
293
+ zeros_img = to_pil_image(np.zeros((height, width, 3), dtype=np.uint8))
294
+ output_img_np = overlay_text_on_frame(np.array(zeros_img), "blink")
295
+ predicted_diameter = "blink"
296
  else:
297
+ if left_eye is not None and eye_type == "left_eye":
298
+ if left_pupil_cam_extractor is None:
299
+ if tv_model == "ResNet18":
300
+ target_layer = left_pupil_model.resnet.layer4[-1].conv2
301
+ elif tv_model == "ResNet50":
302
+ target_layer = left_pupil_model.resnet.layer4[-1].conv3
303
+ else:
304
+ raise Exception(f"No target layer available for selected model: {tv_model}")
305
+ left_pupil_cam_extractor = torchcam_methods.__dict__[cam_method](
306
+ left_pupil_model,
307
+ target_layer=target_layer,
308
+ fc_layer=left_pupil_model.resnet.fc,
309
+ input_shape=left_eye.shape,
310
+ )
311
+ output = left_pupil_model(left_eye)
312
+ predicted_diameter = output[0].item()
313
+ act_maps = left_pupil_cam_extractor(0, output)
314
+ activation_map = act_maps[0] if len(act_maps) == 1 else left_pupil_cam_extractor.fuse_cams(act_maps)
315
+ input_image_pil = to_pil_image(left_eye.squeeze(0))
316
+ elif right_eye is not None and eye_type == "right_eye":
317
+ if right_pupil_cam_extractor is None:
318
+ if tv_model == "ResNet18":
319
+ target_layer = right_pupil_model.resnet.layer4[-1].conv2
320
+ elif tv_model == "ResNet50":
321
+ target_layer = right_pupil_model.resnet.layer4[-1].conv3
322
+ else:
323
+ raise Exception(f"No target layer available for selected model: {tv_model}")
324
+ right_pupil_cam_extractor = torchcam_methods.__dict__[cam_method](
325
+ right_pupil_model,
326
+ target_layer=target_layer,
327
+ fc_layer=right_pupil_model.resnet.fc,
328
+ input_shape=right_eye.shape,
329
+ )
330
+ output = right_pupil_model(right_eye)
331
+ predicted_diameter = output[0].item()
332
+ act_maps = right_pupil_cam_extractor(0, output)
333
+ activation_map = (
334
+ act_maps[0] if len(act_maps) == 1 else right_pupil_cam_extractor.fuse_cams(act_maps)
335
+ )
336
+ input_image_pil = to_pil_image(right_eye.squeeze(0))
337
+
338
  # Create CAM overlay
339
  activation_map_pil = to_pil_image(activation_map, mode="F")
340
  result = overlay_mask(input_image_pil, activation_map_pil, alpha=0.5)
341
+ input_img_np = np.array(input_image_pil)
342
+ output_img_np = np.array(result)
 
343
 
344
  # Add frame and predicted diameter to lists
345
  input_frames[eye_type].append(input_img_np)
 
349
  if output_path:
350
  height, width, _ = output_img_np.shape
351
  frame = np.zeros((height, width, 3), dtype=np.uint8)
352
+ if not isinstance(predicted_diameter, str):
353
+ text = f"{predicted_diameter:.2f}"
354
+ else:
355
+ text = predicted_diameter
356
  frame = overlay_text_on_frame(frame, text)
357
 
358
  video_input_placeholders[eye_type].image(input_img_np, use_column_width=True)
 
442
 
443
  for diameter in predicted_diameters[eye_type]:
444
  frame = np.zeros((height, width, 3), dtype=np.uint8)
445
+ if not isinstance(diameter, str):
446
+ text = f"{diameter:.2f}"
447
+ else:
448
+ text = diameter
449
  frame = overlay_text_on_frame(frame, text)
450
  out.write(frame)
451
  out.release()
 
458
  os.remove(output_path)
459
 
460
 
461
+ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method, blink_detection=False):
462
 
463
  resized_frames = []
464
  for i, frame in enumerate(video_frames):
 
468
  file_format = output_path.split(".")[-1]
469
  codec, extension = get_codec_and_extension(file_format)
470
 
471
+ input_frames, output_frames, predicted_diameters, face_frames = process_frames(
472
+ cols, resized_frames, tv_model, pupil_selection, cam_method, output_path, codec, blink_detection
473
+ )
474
+
475
+ return input_frames, output_frames, predicted_diameters, face_frames
476
+
477
+
478
+ # Function to convert string values to float or None
479
+ def convert_diameter(value):
480
+ try:
481
+ return float(value)
482
+ except (ValueError, TypeError):
483
+ return None # Return None if conversion fails
image.py DELETED
@@ -1,32 +0,0 @@
1
- import cv2
2
- import numpy as np
3
-
4
- # Load the original face image
5
- face_image = cv2.imread("path_to_face_image.jpg")
6
-
7
- # Suppose CAM_left and CAM_right are the CAM results for the eyes (each 32x64)
8
- CAM_left = cv2.imread("path_to_CAM_left.jpg") # or generated by your model
9
- CAM_right = cv2.imread("path_to_CAM_right.jpg") # or generated by your model
10
-
11
- # Example bounding boxes for the left and right eye
12
- left_eye_bbox = (x_left, y_left, width_left, height_left)
13
- right_eye_bbox = (x_right, y_right, width_right, height_right)
14
-
15
- # Resize CAM images if needed (they should be 32x64, but resize to match bbox size)
16
- CAM_left_resized = cv2.resize(CAM_left, (width_left, height_left))
17
- CAM_right_resized = cv2.resize(CAM_right, (width_right, height_right))
18
-
19
- # Create a copy of the face image to overlay the CAM results
20
- face_with_CAM = face_image.copy()
21
-
22
- # Overlay left eye CAM
23
- face_with_CAM[y_left : y_left + height_left, x_left : x_left + width_left] = CAM_left_resized
24
-
25
- # Overlay right eye CAM
26
- face_with_CAM[y_right : y_right + height_right, x_right : x_right + width_right] = CAM_right_resized
27
-
28
- # Save or display the result
29
- cv2.imwrite("face_with_CAM_overlay.jpg", face_with_CAM)
30
- cv2.imshow("Face with CAM Overlay", face_with_CAM)
31
- cv2.waitKey(0)
32
- cv2.destroyAllWindows()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
registrations/models.py CHANGED
@@ -11,38 +11,6 @@ sys.path.append(root_path)
11
  # ============================= ResNets =============================
12
 
13
 
14
- # @MODEL_REGISTRY.register()
15
- # class ResNet18(nn.Module):
16
- # def __init__(self, model_args):
17
- # super(ResNet18, self).__init__()
18
- # self.num_classes = model_args.get("num_classes", 1)
19
- # self.resnet = models.resnet18(weights=None, num_classes=self.num_classes)
20
-
21
- # def forward(self, x, masks=None):
22
- # return self.resnet(x)
23
-
24
-
25
- # @MODEL_REGISTRY.register()
26
- # class ResNet18(nn.Module):
27
- # def __init__(self, model_args):
28
- # super(ResNet18, self).__init__()
29
- # self.num_classes = model_args.get("num_classes", 1)
30
- # self.resnet = models.resnet18(weights=None, num_classes=self.num_classes)
31
-
32
- # def forward(self, x, masks=None):
33
- # # Calculate the padding dynamically based on the input size
34
- # height, width = x.shape[2], x.shape[3]
35
- # pad_height = max(0, (224 - height) // 2)
36
- # pad_width = max(0, (224 - width) // 2)
37
-
38
- # # Apply padding
39
- # x = F.pad(
40
- # x, (pad_width, pad_width, pad_height, pad_height), mode="constant", value=0
41
- # )
42
- # x = self.resnet(x)
43
- # return x
44
-
45
-
46
  @MODEL_REGISTRY.register()
47
  class ResNet18(nn.Module):
48
  def __init__(self, model_args):
@@ -58,46 +26,12 @@ class ResNet18(nn.Module):
58
  pad_width = max(0, (224 - width) // 2)
59
 
60
  # Apply padding
61
- x = F.pad(
62
- x, (pad_width, pad_width, pad_height, pad_height), mode="constant", value=0
63
- )
64
  x = self.resnet(x)
65
  x = self.regression_head(x)
66
  return x
67
 
68
 
69
- # @MODEL_REGISTRY.register()
70
- # class ResNet50(nn.Module):
71
- # def __init__(self, model_args):
72
- # super(ResNet50, self).__init__()
73
- # self.num_classes = model_args.get("num_classes", 1)
74
- # self.resnet = models.resnet50(weights=None, num_classes=self.num_classes)
75
-
76
- # def forward(self, x, masks=None):
77
- # return self.resnet(x)
78
-
79
-
80
- # @MODEL_REGISTRY.register()
81
- # class ResNet50(nn.Module):
82
- # def __init__(self, model_args):
83
- # super(ResNet50, self).__init__()
84
- # self.num_classes = model_args.get("num_classes", 1)
85
- # self.resnet = models.resnet50(weights=None, num_classes=self.num_classes)
86
-
87
- # def forward(self, x, masks=None):
88
- # # Calculate the padding dynamically based on the input size
89
- # height, width = x.shape[2], x.shape[3]
90
- # pad_height = max(0, (224 - height) // 2)
91
- # pad_width = max(0, (224 - width) // 2)
92
-
93
- # # Apply padding
94
- # x = F.pad(
95
- # x, (pad_width, pad_width, pad_height, pad_height), mode="constant", value=0
96
- # )
97
- # x = self.resnet(x)
98
- # return x
99
-
100
-
101
  @MODEL_REGISTRY.register()
102
  class ResNet50(nn.Module):
103
  def __init__(self, model_args):
@@ -113,9 +47,7 @@ class ResNet50(nn.Module):
113
  pad_width = max(0, (224 - width) // 2)
114
 
115
  # Apply padding
116
- x = F.pad(
117
- x, (pad_width, pad_width, pad_height, pad_height), mode="constant", value=0
118
- )
119
  x = self.resnet(x)
120
  x = self.regression_head(x)
121
  return x
 
11
  # ============================= ResNets =============================
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  @MODEL_REGISTRY.register()
15
  class ResNet18(nn.Module):
16
  def __init__(self, model_args):
 
26
  pad_width = max(0, (224 - width) // 2)
27
 
28
  # Apply padding
29
+ x = F.pad(x, (pad_width, pad_width, pad_height, pad_height), mode="constant", value=0)
 
 
30
  x = self.resnet(x)
31
  x = self.regression_head(x)
32
  return x
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  @MODEL_REGISTRY.register()
36
  class ResNet50(nn.Module):
37
  def __init__(self, model_args):
 
47
  pad_width = max(0, (224 - width) // 2)
48
 
49
  # Apply padding
50
+ x = F.pad(x, (pad_width, pad_width, pad_height, pad_height), mode="constant", value=0)
 
 
51
  x = self.resnet(x)
52
  x = self.regression_head(x)
53
  return x
video.py DELETED
@@ -1,48 +0,0 @@
1
- import cv2
2
- import torch
3
-
4
- # Load the video
5
- video_path = "path_to_video.mp4"
6
- cap = cv2.VideoCapture(video_path)
7
-
8
- # Video properties
9
- frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
10
- frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
11
- fps = cap.get(cv2.CAP_PROP_FPS)
12
-
13
- # Create a VideoWriter object for the output video
14
- out = cv2.VideoWriter("output_with_CAM.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (frame_width, frame_height))
15
-
16
- # Process each frame
17
- while True:
18
- ret, frame = cap.read()
19
- if not ret:
20
- break # End of the video
21
-
22
- # Detect landmarks for left and right eye bounding boxes (example)
23
- left_eye_bbox = (x_left, y_left, width_left, height_left)
24
- right_eye_bbox = (x_right, y_right, width_right, height_right)
25
-
26
- # Crop the eyes
27
- left_eye = frame[y_left : y_left + height_left, x_left : x_left + width_left]
28
- right_eye = frame[y_right : y_right + height_right, x_right : x_right + width_right]
29
-
30
- # Generate CAMs for left and right eyes
31
- CAM_left = generate_CAM(left_eye) # Use your model here
32
- CAM_right = generate_CAM(right_eye) # Use your model here
33
-
34
- # Resize CAMs if necessary
35
- CAM_left_resized = cv2.resize(CAM_left, (width_left, height_left))
36
- CAM_right_resized = cv2.resize(CAM_right, (width_right, height_right))
37
-
38
- # Overlay the CAMs onto the original frame
39
- frame[y_left : y_left + height_left, x_left : x_left + width_left] = CAM_left_resized
40
- frame[y_right : y_right + height_right, x_right : x_right + width_right] = CAM_right_resized
41
-
42
- # Write the processed frame to the output video
43
- out.write(frame)
44
-
45
- # Release resources
46
- cap.release()
47
- out.release()
48
- cv2.destroyAllWindows()