vijul.shah commited on
Commit
4b41e60
·
1 Parent(s): 9acc552

Frames Processing Optimized

Browse files
Files changed (3) hide show
  1. app.py +17 -6
  2. app_old.py +0 -434
  3. app_utils.py +64 -27
app.py CHANGED
@@ -56,7 +56,7 @@ def main():
56
  input_img = resize_frame(input_img, max_width=640, max_height=480)
57
  input_img = resize_frame(input_img, max_width=640, max_height=480)
58
  cols[0].image(input_img, use_column_width=True)
59
- input_img.save("out.jpg")
60
 
61
  elif is_video(file_extension):
62
  tfile = tempfile.NamedTemporaryFile(delete=False)
@@ -64,6 +64,12 @@ def main():
64
  video_path = tfile.name
65
  video_frames = extract_frames(video_path)
66
  cols[0].video(video_path)
 
 
 
 
 
 
67
 
68
  st.sidebar.title("Setup")
69
  pupil_selection = st.sidebar.selectbox(
@@ -79,11 +85,17 @@ def main():
79
 
80
  if is_image(file_extension):
81
  input_frames, output_frames, predicted_diameters, face_frames = process_frames(
82
- [input_img], tv_model, pupil_selection, cam_method=CAM_METHODS[-1]
 
 
 
 
 
 
83
  )
84
- for ff in face_frames:
85
- if ff["has_face"]:
86
- cols[1].image(face_frames[0]["img"], use_column_width=True)
87
 
88
  input_frames_keys = input_frames.keys()
89
  video_cols = cols[1].columns(len(input_frames_keys))
@@ -106,7 +118,6 @@ def main():
106
  process_video(
107
  cols, video_frames, tv_model, pupil_selection, output_video_path, cam_method=CAM_METHODS[-1]
108
  )
109
-
110
  os.remove(video_path)
111
 
112
 
 
56
  input_img = resize_frame(input_img, max_width=640, max_height=480)
57
  input_img = resize_frame(input_img, max_width=640, max_height=480)
58
  cols[0].image(input_img, use_column_width=True)
59
+ st.session_state.total_frames = 1
60
 
61
  elif is_video(file_extension):
62
  tfile = tempfile.NamedTemporaryFile(delete=False)
 
64
  video_path = tfile.name
65
  video_frames = extract_frames(video_path)
66
  cols[0].video(video_path)
67
+ st.session_state.total_frames = len(video_frames)
68
+
69
+ st.session_state.current_frame = 0
70
+ st.session_state.frame_placeholder = cols[0].empty()
71
+ txt = f"<p style='font-size:20px;'> Number of Frames Processed: <strong>{st.session_state.current_frame} / {st.session_state.total_frames}</strong> </p>"
72
+ st.session_state.frame_placeholder.markdown(txt, unsafe_allow_html=True)
73
 
74
  st.sidebar.title("Setup")
75
  pupil_selection = st.sidebar.selectbox(
 
85
 
86
  if is_image(file_extension):
87
  input_frames, output_frames, predicted_diameters, face_frames = process_frames(
88
+ cols,
89
+ [input_img],
90
+ tv_model,
91
+ pupil_selection,
92
+ cam_method=CAM_METHODS[-1],
93
+ output_path=None,
94
+ codec=None,
95
  )
96
+ # for ff in face_frames:
97
+ # if ff["has_face"]:
98
+ # cols[1].image(face_frames[0]["img"], use_column_width=True)
99
 
100
  input_frames_keys = input_frames.keys()
101
  video_cols = cols[1].columns(len(input_frames_keys))
 
118
  process_video(
119
  cols, video_frames, tv_model, pupil_selection, output_video_path, cam_method=CAM_METHODS[-1]
120
  )
 
121
  os.remove(video_path)
122
 
123
 
app_old.py DELETED
@@ -1,434 +0,0 @@
1
- # takn from: https://huggingface.co/spaces/frgfm/torch-cam/blob/main/app.py
2
-
3
- # streamlit run app.py
4
- from io import BytesIO
5
- import os
6
- import sys
7
- import cv2
8
- import matplotlib.pyplot as plt
9
- import numpy as np
10
- import streamlit as st
11
- import torch
12
- import tempfile
13
- from PIL import Image
14
- from torchvision import models
15
- from torchvision.transforms.functional import normalize, resize, to_pil_image, to_tensor
16
- from torchvision import transforms
17
-
18
- from torchcam.methods import CAM
19
- from torchcam import methods as torchcam_methods
20
- from torchcam.utils import overlay_mask
21
- import os.path as osp
22
-
23
- root_path = osp.abspath(osp.join(__file__, osp.pardir))
24
- sys.path.append(root_path)
25
-
26
- from preprocessing.dataset_creation import EyeDentityDatasetCreation
27
- from utils import get_model
28
- from registry_utils import import_registered_modules
29
-
30
- import_registered_modules()
31
- # from torchcam.methods._utils import locate_candidate_layer
32
-
33
- CAM_METHODS = [
34
- "CAM",
35
- # "GradCAM",
36
- # "GradCAMpp",
37
- # "SmoothGradCAMpp",
38
- # "ScoreCAM",
39
- # "SSCAM",
40
- # "ISCAM",
41
- # "XGradCAM",
42
- # "LayerCAM",
43
- ]
44
- TV_MODELS = [
45
- "ResNet18",
46
- "ResNet50",
47
- ]
48
- SR_METHODS = ["GFPGAN", "CodeFormer", "RealESRGAN", "SRResNet", "HAT"]
49
- UPSCALE = [2, 4]
50
- UPSCALE_METHODS = ["BILINEAR", "BICUBIC"]
51
- LABEL_MAP = ["left_pupil", "right_pupil"]
52
-
53
-
54
- @torch.no_grad()
55
- def _load_model(model_configs, device="cpu"):
56
- model_path = os.path.join(root_path, model_configs["model_path"])
57
- model_configs.pop("model_path")
58
- model_dict = torch.load(model_path, map_location=device)
59
- model = get_model(model_configs=model_configs)
60
- model.load_state_dict(model_dict)
61
- model = model.to(device)
62
- model = model.eval()
63
- return model
64
-
65
-
66
- def extract_frames(video_path):
67
- vidcap = cv2.VideoCapture(video_path)
68
- frames = []
69
- success, image = vidcap.read()
70
- count = 0
71
- while success:
72
- # Convert the frame to RGB (cv2 uses BGR by default)
73
- image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
74
- frames.append(image_rgb)
75
- success, image = vidcap.read()
76
- count += 1
77
- vidcap.release()
78
- return frames
79
-
80
-
81
- # Function to check if a file is an image
82
- def is_image(file_extension):
83
- return file_extension.lower() in ["png", "jpeg", "jpg"]
84
-
85
-
86
- # Function to check if a file is a video
87
- def is_video(file_extension):
88
- return file_extension.lower() in ["mp4", "avi", "mov", "mkv", "webm"]
89
-
90
-
91
- def resize_frame(frame, max_width, max_height):
92
- image = Image.fromarray(frame)
93
- original_size = image.size
94
-
95
- # Resize the frame similarly to the image resizing logic
96
- if original_size[0] == original_size[1] and original_size[0] >= 256:
97
- max_size = (256, 256)
98
- else:
99
- max_size = list(original_size)
100
- if original_size[0] >= 640:
101
- max_size[0] = 640
102
- elif original_size[0] < 64:
103
- max_size[0] = 64
104
- if original_size[1] >= 480:
105
- max_size[1] = 480
106
- elif original_size[1] < 32:
107
- max_size[1] = 32
108
-
109
- image.thumbnail(max_size)
110
- return image
111
-
112
-
113
- def main():
114
- # Wide mode
115
- st.set_page_config(page_title="Pupil Diameter Estimator", layout="wide")
116
-
117
- # Designing the interface
118
- st.title("EyeDentify Playground")
119
- # For newline
120
- st.write("\n")
121
- # Set the columns
122
- cols = st.columns((1, 1))
123
- # cols = st.columns((1, 1, 1))
124
- cols[0].header("Input image")
125
- # cols[1].header("Raw CAM")
126
- cols[-1].header("Prediction")
127
-
128
- # Sidebar
129
- # File selection
130
- st.sidebar.title("Upload Face or Eye")
131
- # Disabling warning
132
- st.set_option("deprecation.showfileUploaderEncoding", False)
133
- # Choose your own image
134
- uploaded_file = st.sidebar.file_uploader(
135
- "Upload Image or Video", type=["png", "jpeg", "jpg", "mp4", "avi", "mov", "mkv", "webm"]
136
- )
137
- if uploaded_file is not None:
138
- # Get file extension
139
- file_extension = uploaded_file.name.split(".")[-1]
140
- input_imgs = []
141
-
142
- if is_image(file_extension):
143
- input_img = Image.open(BytesIO(uploaded_file.read()), mode="r").convert("RGB")
144
- # print("input_img before = ", input_img.size)
145
- max_size = [input_img.size[0], input_img.size[1]]
146
- cols[0].text(f"Input Image: {max_size[0]} x {max_size[1]}")
147
- if input_img.size[0] == input_img.size[1] and input_img.size[0] >= 256:
148
- max_size[0] = 256
149
- max_size[1] = 256
150
- else:
151
- if input_img.size[0] >= 640:
152
- max_size[0] = 640
153
- elif input_img.size[0] < 64:
154
- max_size[0] = 64
155
- if input_img.size[1] >= 480:
156
- max_size[1] = 480
157
- elif input_img.size[1] < 32:
158
- max_size[1] = 32
159
- input_img.thumbnail((max_size[0], max_size[1])) # Bicubic resampling
160
- input_imgs.append(input_img)
161
- # print("input_img after = ", input_img.size)
162
- # cols[0].image(input_img)
163
- fig0, axs0 = plt.subplots(1, 1, figsize=(10, 10))
164
- # Display the input image
165
- axs0.imshow(input_imgs[0])
166
- axs0.axis("off")
167
- axs0.set_title("Input Image")
168
-
169
- # Display the plot
170
- cols[0].pyplot(fig0)
171
- cols[0].text(f"Input Image Resized: {max_size[0]} x {max_size[1]}")
172
-
173
- # TODO: show the face features extracted from the image under 'input image' column
174
- elif is_video(file_extension):
175
- tfile = tempfile.NamedTemporaryFile(delete=False)
176
- tfile.write(uploaded_file.read())
177
- video_path = tfile.name
178
-
179
- # Extract frames from the video
180
- frames = extract_frames(video_path)
181
- print(f"Extracted {len(frames)} frames from the video")
182
-
183
- # Process the frames
184
- for i, frame in enumerate(frames):
185
- input_imgs.append(resize_frame(frame, 640, 480))
186
-
187
- os.remove(video_path)
188
-
189
- fig0, axs0 = plt.subplots(1, 1, figsize=(10, 10))
190
- # Display the input image
191
- axs0.imshow(input_imgs[0])
192
- axs0.axis("off")
193
- axs0.set_title("Input Image")
194
-
195
- # Display the plot
196
- cols[0].pyplot(fig0)
197
- # cols[0].text(f"Input Image Resized: {max_size[0]} x {max_size[1]}")
198
-
199
- st.sidebar.title("Setup")
200
-
201
- # Upscale selection
202
- upscale = "-"
203
- # upscale = st.sidebar.selectbox(
204
- # "Upscale",
205
- # ["-"] + UPSCALE,
206
- # help="Upscale the uploaded image 2 or 4 times. Keep blank for no upscaling",
207
- # )
208
-
209
- # Upscale method selection
210
- if upscale != "-":
211
- upscale_method_or_model = st.sidebar.selectbox(
212
- "Upscale Method / Model",
213
- UPSCALE_METHODS + SR_METHODS,
214
- help="Select a method or model to upscale the uploaded image",
215
- )
216
- else:
217
- upscale_method_or_model = None
218
-
219
- # Pupil selection
220
- pupil_selection = st.sidebar.selectbox(
221
- "Pupil Selection",
222
- ["-"] + LABEL_MAP,
223
- help="Select left or right pupil OR keep blank for both pupil diameter estimation",
224
- )
225
-
226
- # Model selection
227
- tv_model = st.sidebar.selectbox(
228
- "Classification model",
229
- TV_MODELS,
230
- help="Supported Models for Pupil Diameter Estimation",
231
- )
232
-
233
- cam_method = "CAM"
234
- # cam_method = st.sidebar.selectbox(
235
- # "CAM method",
236
- # CAM_METHODS,
237
- # help="The way your class activation map will be computed",
238
- # )
239
- # target_layer = st.sidebar.text_input(
240
- # "Target layer",
241
- # default_layer,
242
- # help='If you want to target several layers, add a "+" separator (e.g. "layer3+layer4")',
243
- # )
244
-
245
- st.sidebar.write("\n")
246
-
247
- if st.sidebar.button("Predict Diameter & Compute CAM"):
248
- if uploaded_file is None:
249
- st.sidebar.error("Please upload an image first")
250
-
251
- else:
252
- with st.spinner("Analyzing..."):
253
- model = None
254
- for input_img in input_imgs:
255
- if upscale == "-":
256
- sr_configs = None
257
- else:
258
- sr_configs = {
259
- "method": upscale_method_or_model,
260
- "params": {"upscale": upscale},
261
- }
262
- config_file = {
263
- "sr_configs": sr_configs,
264
- "feature_extraction_configs": {
265
- "blink_detection": False,
266
- "upscale": upscale,
267
- "extraction_library": "mediapipe",
268
- },
269
- }
270
-
271
- img = np.array(input_img)
272
- # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
273
- # if img.shape[0] > max_size or img.shape[1] > max_size:
274
- # img = cv2.resize(img, (max_size, max_size))
275
-
276
- ds_results = EyeDentityDatasetCreation(
277
- feature_extraction_configs=config_file["feature_extraction_configs"],
278
- sr_configs=config_file["sr_configs"],
279
- )(img)
280
-
281
- # if ds_results is not None:
282
- # print("ds_results = ", ds_results.keys())
283
- # NOTE:
284
- # ds_results.keys() contains ===> 'full_imgs', 'faces', 'eyes', 'blinks', 'iris'
285
-
286
- preprocess_steps = [
287
- transforms.ToTensor(),
288
- transforms.Resize(
289
- [32, 64],
290
- interpolation=transforms.InterpolationMode.BICUBIC,
291
- antialias=True,
292
- ),
293
- ]
294
- preprocess_function = transforms.Compose(preprocess_steps)
295
-
296
- left_eye = None
297
- right_eye = None
298
-
299
- if ds_results is None:
300
- # print("type of input_img = ", type(input_img))
301
- input_img = preprocess_function(input_img)
302
- input_img = input_img.unsqueeze(0)
303
- if pupil_selection == "left_pupil":
304
- left_eye = input_img
305
- elif pupil_selection == "right_pupil":
306
- right_eye = input_img
307
- else:
308
- left_eye = input_img
309
- right_eye = input_img
310
- # print("type of left_eye = ", type(left_eye))
311
- # print("type of right_eye = ", type(right_eye))
312
- elif "eyes" in ds_results.keys():
313
- if "left_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["left_eye"] is not None:
314
- left_eye = ds_results["eyes"]["left_eye"]
315
- # print("type of left_eye = ", type(left_eye))
316
- left_eye = to_pil_image(left_eye).convert("RGB")
317
- # print("type of left_eye = ", type(left_eye))
318
-
319
- left_eye = preprocess_function(left_eye)
320
- # print("type of left_eye = ", type(left_eye))
321
-
322
- left_eye = left_eye.unsqueeze(0)
323
- if "right_eye" in ds_results["eyes"].keys() and ds_results["eyes"]["right_eye"] is not None:
324
- right_eye = ds_results["eyes"]["right_eye"]
325
- # print("type of right_eye = ", type(right_eye))
326
- right_eye = to_pil_image(right_eye).convert("RGB")
327
- # print("type of right_eye = ", type(right_eye))
328
-
329
- right_eye = preprocess_function(right_eye)
330
- # print("type of right_eye = ", type(right_eye))
331
-
332
- right_eye = right_eye.unsqueeze(0)
333
- else:
334
- # print("type of input_img = ", type(input_img))
335
- input_img = preprocess_function(input_img)
336
- input_img = input_img.unsqueeze(0)
337
- if pupil_selection == "left_pupil":
338
- left_eye = input_img
339
- elif pupil_selection == "right_pupil":
340
- right_eye = input_img
341
- else:
342
- left_eye = input_img
343
- right_eye = input_img
344
- # print("type of left_eye = ", type(left_eye))
345
- # print("type of right_eye = ", type(right_eye))
346
-
347
- # print("left_eye = ", left_eye.shape)
348
- # print("right_eye = ", right_eye.shape)
349
-
350
- if pupil_selection == "-":
351
- selected_eyes = ["left_eye", "right_eye"]
352
- elif pupil_selection == "left_pupil":
353
- selected_eyes = ["left_eye"]
354
- elif pupil_selection == "right_pupil":
355
- selected_eyes = ["right_eye"]
356
-
357
- for eye_type in selected_eyes:
358
-
359
- if model is None:
360
- model_configs = {
361
- "model_path": root_path + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
362
- "registered_model_name": tv_model,
363
- "num_classes": 1,
364
- }
365
- registered_model_name = model_configs["registered_model_name"]
366
- model = _load_model(model_configs)
367
-
368
- if registered_model_name == "ResNet18":
369
- target_layer = model.resnet.layer4[-1].conv2
370
- elif registered_model_name == "ResNet50":
371
- target_layer = model.resnet.layer4[-1].conv3
372
- else:
373
- raise Exception(f"No target layer available for selected model: {registered_model_name}")
374
-
375
- if left_eye is not None and eye_type == "left_eye":
376
- input_img = left_eye
377
- elif right_eye is not None and eye_type == "right_eye":
378
- input_img = right_eye
379
- else:
380
- raise Exception("Wrong Data")
381
-
382
- if cam_method is not None:
383
- cam_extractor = torchcam_methods.__dict__[cam_method](
384
- model,
385
- target_layer=target_layer,
386
- fc_layer=model.resnet.fc,
387
- input_shape=input_img.shape,
388
- )
389
-
390
- # with torch.no_grad():
391
- out = model(input_img)
392
- cols[-1].markdown(
393
- f"<h3>Predicted Pupil Diameter: {out[0].item():.2f} mm</h3>",
394
- unsafe_allow_html=True,
395
- )
396
- # cols[-1].text(f"Predicted Pupil Diameter: {out[0].item():.2f}")
397
-
398
- # Retrieve the CAM
399
- act_maps = cam_extractor(0, out)
400
-
401
- # Fuse the CAMs if there are several
402
- activation_map = act_maps[0] if len(act_maps) == 1 else cam_extractor.fuse_cams(act_maps)
403
-
404
- # Convert input image and activation map to PIL images
405
- input_image_pil = to_pil_image(input_img.squeeze(0))
406
- activation_map_pil = to_pil_image(activation_map, mode="F")
407
-
408
- # Create the overlayed CAM result
409
- result = overlay_mask(
410
- input_image_pil,
411
- activation_map_pil,
412
- alpha=0.5,
413
- )
414
-
415
- # Create a subplot with 1 row and 2 columns
416
- fig, axs = plt.subplots(1, 2, figsize=(10, 5))
417
-
418
- # Display the input image
419
- axs[0].imshow(input_image_pil)
420
- axs[0].axis("off")
421
- axs[0].set_title("Input Image")
422
-
423
- # Display the overlayed CAM result
424
- axs[1].imshow(result)
425
- axs[1].axis("off")
426
- axs[1].set_title("Overlayed CAM")
427
-
428
- # Display the plot
429
- cols[-1].pyplot(fig)
430
- cols[-1].text(f"eye image size: {input_img.shape[-1]} x {input_img.shape[-2]}")
431
-
432
-
433
- if __name__ == "__main__":
434
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_utils.py CHANGED
@@ -110,7 +110,7 @@ def overlay_text_on_frame(frame, text, position=(16, 20)):
110
  return cv2.putText(frame, text, position, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, cv2.LINE_AA)
111
 
112
 
113
- def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
114
  upscale = "-"
115
  upscale_method_or_model = "-"
116
  if upscale == "-":
@@ -144,7 +144,7 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
144
  elif pupil_selection == "right_pupil":
145
  selected_eyes = ["right_eye"]
146
 
147
- for eye_type in selected_eyes:
148
  model_configs = {
149
  "model_path": root_path + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
150
  "registered_model_name": tv_model,
@@ -163,6 +163,21 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
163
  input_frames[eye_type] = []
164
  predicted_diameters[eye_type] = []
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  ds_creation = EyeDentityDatasetCreation(
167
  feature_extraction_configs=config_file["feature_extraction_configs"],
168
  sr_configs=config_file["sr_configs"],
@@ -178,7 +193,7 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
178
  ]
179
  preprocess_function = transforms.Compose(preprocess_steps)
180
 
181
- for input_img in input_imgs:
182
 
183
  img = np.array(input_img)
184
  ds_results = ds_creation(img)
@@ -219,7 +234,7 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
219
  left_eye = input_img
220
  right_eye = input_img
221
 
222
- for eye_type in selected_eyes:
223
  if left_eye is not None and eye_type == "left_eye":
224
  if left_pupil_cam_extractor is None:
225
  if tv_model == "ResNet18":
@@ -269,11 +284,33 @@ def process_frames(input_imgs, tv_model, pupil_selection, cam_method):
269
  activation_map_pil = to_pil_image(activation_map, mode="F")
270
  result = overlay_mask(input_image_pil, activation_map_pil, alpha=0.5)
271
 
 
 
 
272
  # Add frame and predicted diameter to lists
273
- input_frames[eye_type].append(np.array(input_image_pil))
274
- output_frames[eye_type].append(np.array(result))
275
  predicted_diameters[eye_type].append(predicted_diameter)
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  return input_frames, output_frames, predicted_diameters, face_frames
278
 
279
 
@@ -299,23 +336,7 @@ def get_codec_and_extension(file_format):
299
  return "MJPG", ".avi"
300
 
301
 
302
- def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method):
303
-
304
- resized_frames = []
305
- for i, frame in enumerate(video_frames):
306
- input_img = resize_frame(frame, max_width=640, max_height=480)
307
- # input_img = Image.fromarray(input_img)
308
- resized_frames.append(input_img)
309
-
310
- input_frames, output_frames, predicted_diameters, face_frames = process_frames(
311
- resized_frames, tv_model, pupil_selection, cam_method
312
- )
313
-
314
- file_format = output_path.split(".")[-1]
315
- codec, extension = get_codec_and_extension(file_format)
316
-
317
- video_cols = cols[1].columns(len(input_frames.keys()))
318
-
319
  for i, eye_type in enumerate(input_frames.keys()):
320
  in_frames = input_frames[eye_type]
321
  height, width, _ = in_frames[0].shape
@@ -329,10 +350,12 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
329
  with open(output_path, "rb") as video_file:
330
  video_bytes = video_file.read()
331
  video_base64 = base64.b64encode(video_bytes).decode("utf-8")
332
- display_video_with_autoplay(video_cols[i], video_base64)
333
 
334
  os.remove(output_path)
335
 
 
 
336
  for i, eye_type in enumerate(output_frames.keys()):
337
  out_frames = output_frames[eye_type]
338
  height, width, _ = out_frames[0].shape
@@ -346,10 +369,12 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
346
  with open(output_path, "rb") as video_file:
347
  video_bytes = video_file.read()
348
  video_base64 = base64.b64encode(video_bytes).decode("utf-8")
349
- display_video_with_autoplay(video_cols[i], video_base64)
350
 
351
  os.remove(output_path)
352
 
 
 
353
  for i, eye_type in enumerate(output_frames.keys()):
354
 
355
  out_frames = output_frames[eye_type]
@@ -368,7 +393,19 @@ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, ca
368
  with open(output_path, "rb") as video_file:
369
  video_bytes = video_file.read()
370
  video_base64 = base64.b64encode(video_bytes).decode("utf-8")
371
- display_video_with_autoplay(video_cols[i], video_base64)
 
372
  os.remove(output_path)
373
 
374
- return predicted_diameters
 
 
 
 
 
 
 
 
 
 
 
 
110
  return cv2.putText(frame, text, position, cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, cv2.LINE_AA)
111
 
112
 
113
+ def process_frames(cols, input_imgs, tv_model, pupil_selection, cam_method, output_path, codec):
114
  upscale = "-"
115
  upscale_method_or_model = "-"
116
  if upscale == "-":
 
144
  elif pupil_selection == "right_pupil":
145
  selected_eyes = ["right_eye"]
146
 
147
+ for i, eye_type in enumerate(selected_eyes):
148
  model_configs = {
149
  "model_path": root_path + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
150
  "registered_model_name": tv_model,
 
163
  input_frames[eye_type] = []
164
  predicted_diameters[eye_type] = []
165
 
166
+ if output_path:
167
+ video_cols = cols[1].columns(len(input_frames.keys()))
168
+
169
+ video_input_placeholders = {}
170
+ for i, eye_type in enumerate(list(input_frames.keys())):
171
+ video_input_placeholders[eye_type] = video_cols[i].empty()
172
+
173
+ video_output_placeholders = {}
174
+ for i, eye_type in enumerate(list(input_frames.keys())):
175
+ video_output_placeholders[eye_type] = video_cols[i].empty()
176
+
177
+ video_predictions_placeholders = {}
178
+ for i, eye_type in enumerate(list(input_frames.keys())):
179
+ video_predictions_placeholders[eye_type] = video_cols[i].empty()
180
+
181
  ds_creation = EyeDentityDatasetCreation(
182
  feature_extraction_configs=config_file["feature_extraction_configs"],
183
  sr_configs=config_file["sr_configs"],
 
193
  ]
194
  preprocess_function = transforms.Compose(preprocess_steps)
195
 
196
+ for idx, input_img in enumerate(input_imgs):
197
 
198
  img = np.array(input_img)
199
  ds_results = ds_creation(img)
 
234
  left_eye = input_img
235
  right_eye = input_img
236
 
237
+ for i, eye_type in enumerate(selected_eyes):
238
  if left_eye is not None and eye_type == "left_eye":
239
  if left_pupil_cam_extractor is None:
240
  if tv_model == "ResNet18":
 
284
  activation_map_pil = to_pil_image(activation_map, mode="F")
285
  result = overlay_mask(input_image_pil, activation_map_pil, alpha=0.5)
286
 
287
+ input_img_np = np.array(input_image_pil)
288
+ output_img_np = np.array(result)
289
+
290
  # Add frame and predicted diameter to lists
291
+ input_frames[eye_type].append(input_img_np)
292
+ output_frames[eye_type].append(output_img_np)
293
  predicted_diameters[eye_type].append(predicted_diameter)
294
 
295
+ if output_path:
296
+ height, width, _ = output_img_np.shape
297
+ frame = np.zeros((height, width, 3), dtype=np.uint8)
298
+ text = f"{predicted_diameter:.2f}"
299
+ frame = overlay_text_on_frame(frame, text)
300
+
301
+ video_input_placeholders[eye_type].image(input_img_np, use_column_width=True)
302
+ video_output_placeholders[eye_type].image(output_img_np, use_column_width=True)
303
+ video_predictions_placeholders[eye_type].image(frame, use_column_width=True)
304
+
305
+ st.session_state.current_frame = idx + 1
306
+ txt = f"<p style='font-size:20px;'> Number of Frames Processed: <strong>{st.session_state.current_frame} / {st.session_state.total_frames}</strong> </p>"
307
+ st.session_state.frame_placeholder.markdown(txt, unsafe_allow_html=True)
308
+
309
+ if output_path:
310
+ show_input_frames(input_frames, output_path, codec, video_input_placeholders)
311
+ show_cam_frames(output_frames, output_path, codec, video_output_placeholders)
312
+ show_pred_text_frames(output_frames, output_path, predicted_diameters, codec, video_predictions_placeholders)
313
+
314
  return input_frames, output_frames, predicted_diameters, face_frames
315
 
316
 
 
336
  return "MJPG", ".avi"
337
 
338
 
339
+ def show_input_frames(input_frames, output_path, codec, video_cols):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  for i, eye_type in enumerate(input_frames.keys()):
341
  in_frames = input_frames[eye_type]
342
  height, width, _ = in_frames[0].shape
 
350
  with open(output_path, "rb") as video_file:
351
  video_bytes = video_file.read()
352
  video_base64 = base64.b64encode(video_bytes).decode("utf-8")
353
+ display_video_with_autoplay(video_cols[eye_type], video_base64)
354
 
355
  os.remove(output_path)
356
 
357
+
358
+ def show_cam_frames(output_frames, output_path, codec, video_cols):
359
  for i, eye_type in enumerate(output_frames.keys()):
360
  out_frames = output_frames[eye_type]
361
  height, width, _ = out_frames[0].shape
 
369
  with open(output_path, "rb") as video_file:
370
  video_bytes = video_file.read()
371
  video_base64 = base64.b64encode(video_bytes).decode("utf-8")
372
+ display_video_with_autoplay(video_cols[eye_type], video_base64)
373
 
374
  os.remove(output_path)
375
 
376
+
377
+ def show_pred_text_frames(output_frames, output_path, predicted_diameters, codec, video_cols):
378
  for i, eye_type in enumerate(output_frames.keys()):
379
 
380
  out_frames = output_frames[eye_type]
 
393
  with open(output_path, "rb") as video_file:
394
  video_bytes = video_file.read()
395
  video_base64 = base64.b64encode(video_bytes).decode("utf-8")
396
+ display_video_with_autoplay(video_cols[eye_type], video_base64)
397
+
398
  os.remove(output_path)
399
 
400
+
401
+ def process_video(cols, video_frames, tv_model, pupil_selection, output_path, cam_method):
402
+
403
+ resized_frames = []
404
+ for i, frame in enumerate(video_frames):
405
+ input_img = resize_frame(frame, max_width=640, max_height=480)
406
+ resized_frames.append(input_img)
407
+
408
+ file_format = output_path.split(".")[-1]
409
+ codec, extension = get_codec_and_extension(file_format)
410
+
411
+ process_frames(cols, resized_frames, tv_model, pupil_selection, cam_method, output_path, codec)