shayan5422
commited on
Upload 12 files
Browse files- .gitattributes +3 -0
- best_model_sequences.keras +3 -0
- data_preprocessing_sequences.py +174 -0
- dataset_preparation_sequences.py +116 -0
- dataset_sequences.pkl +3 -0
- final_model_sequences.keras +3 -0
- frame_extraction.py +61 -0
- history_sequences.pkl +3 -0
- model_building_sequences.py +110 -0
- model_evaluation_sequences.py +123 -0
- prediction_sequences.py +284 -0
- shape_predictor_68_face_landmarks.dat +3 -0
- video_capture.py +62 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
best_model_sequences.keras filter=lfs diff=lfs merge=lfs -text
|
37 |
+
final_model_sequences.keras filter=lfs diff=lfs merge=lfs -text
|
38 |
+
shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
|
best_model_sequences.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80f527be8dbdcfb36488d84cecad86368a1c8c4f618eafd204e6f23bcbef42ed
|
3 |
+
size 342613105
|
data_preprocessing_sequences.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# data_preprocessing_sequences.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import cv2
|
5 |
+
import dlib
|
6 |
+
import numpy as np
|
7 |
+
from imutils import face_utils
|
8 |
+
from tqdm import tqdm
|
9 |
+
import pickle
|
10 |
+
|
11 |
+
def get_facial_landmarks(detector, predictor, image):
|
12 |
+
"""
|
13 |
+
Detects facial landmarks in an image.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
detector: dlib face detector.
|
17 |
+
predictor: dlib shape predictor.
|
18 |
+
image (numpy.ndarray): Input image.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
dict: Coordinates of eyes and eyebrows.
|
22 |
+
"""
|
23 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
24 |
+
rects = detector(gray, 1)
|
25 |
+
|
26 |
+
if len(rects) == 0:
|
27 |
+
return None # No face detected
|
28 |
+
|
29 |
+
# Assuming the first detected face is the target
|
30 |
+
rect = rects[0]
|
31 |
+
shape = predictor(gray, rect)
|
32 |
+
shape = face_utils.shape_to_np(shape)
|
33 |
+
|
34 |
+
landmarks = {}
|
35 |
+
# Define landmarks for left and right eyes and eyebrows
|
36 |
+
landmarks['left_eye'] = shape[36:42] # Left eye landmarks
|
37 |
+
landmarks['right_eye'] = shape[42:48] # Right eye landmarks
|
38 |
+
landmarks['left_eyebrow'] = shape[17:22] # Left eyebrow landmarks
|
39 |
+
landmarks['right_eyebrow'] = shape[22:27] # Right eyebrow landmarks
|
40 |
+
|
41 |
+
return landmarks
|
42 |
+
|
43 |
+
def extract_roi(image, landmarks, region='left_eye', padding=5):
|
44 |
+
"""
|
45 |
+
Extracts a region of interest (ROI) from the image based on landmarks.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
image (numpy.ndarray): Input image.
|
49 |
+
landmarks (dict): Facial landmarks.
|
50 |
+
region (str): Region to extract ('left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow').
|
51 |
+
padding (int): Padding around the ROI.
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
numpy.ndarray: Extracted ROI.
|
55 |
+
"""
|
56 |
+
points = landmarks.get(region)
|
57 |
+
if points is None:
|
58 |
+
return None
|
59 |
+
|
60 |
+
# Compute the bounding box
|
61 |
+
x, y, w, h = cv2.boundingRect(points)
|
62 |
+
x = max(x - padding, 0)
|
63 |
+
y = max(y - padding, 0)
|
64 |
+
w = w + 2 * padding
|
65 |
+
h = h + 2 * padding
|
66 |
+
|
67 |
+
roi = image[y:y+h, x:x+w]
|
68 |
+
return roi
|
69 |
+
|
70 |
+
def preprocess_video_sequence(sequence_dir, detector, predictor, img_size=(64, 64)):
|
71 |
+
"""
|
72 |
+
Preprocesses a sequence of frames from a video.
|
73 |
+
|
74 |
+
Args:
|
75 |
+
sequence_dir (str): Directory containing frames of a video.
|
76 |
+
detector: dlib face detector.
|
77 |
+
predictor: dlib shape predictor.
|
78 |
+
img_size (tuple): Desired image size for ROIs.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
list: List of preprocessed frames as numpy arrays.
|
82 |
+
"""
|
83 |
+
frames = sorted([f for f in os.listdir(sequence_dir) if f.endswith('.jpg') or f.endswith('.png')])
|
84 |
+
preprocessed_sequence = []
|
85 |
+
|
86 |
+
for frame_name in frames:
|
87 |
+
frame_path = os.path.join(sequence_dir, frame_name)
|
88 |
+
image = cv2.imread(frame_path)
|
89 |
+
if image is None:
|
90 |
+
continue
|
91 |
+
|
92 |
+
landmarks = get_facial_landmarks(detector, predictor, image)
|
93 |
+
if landmarks is None:
|
94 |
+
continue # Skip frames with no detected face
|
95 |
+
|
96 |
+
# Extract ROIs for eyes and eyebrows
|
97 |
+
rois = {}
|
98 |
+
rois['left_eye'] = extract_roi(image, landmarks, 'left_eye')
|
99 |
+
rois['right_eye'] = extract_roi(image, landmarks, 'right_eye')
|
100 |
+
rois['left_eyebrow'] = extract_roi(image, landmarks, 'left_eyebrow')
|
101 |
+
rois['right_eyebrow'] = extract_roi(image, landmarks, 'right_eyebrow')
|
102 |
+
|
103 |
+
# Process ROIs
|
104 |
+
roi_images = []
|
105 |
+
for region in ['left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow']:
|
106 |
+
roi = rois.get(region)
|
107 |
+
if roi is not None:
|
108 |
+
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Convert to grayscale
|
109 |
+
roi = cv2.resize(roi, img_size)
|
110 |
+
roi = roi.astype('float32') / 255.0 # Normalize to [0,1]
|
111 |
+
roi = np.expand_dims(roi, axis=-1) # Add channel dimension
|
112 |
+
roi_images.append(roi)
|
113 |
+
|
114 |
+
if len(roi_images) == 0:
|
115 |
+
continue # Skip if no ROIs were extracted
|
116 |
+
|
117 |
+
# Concatenate ROIs horizontally to form a single image
|
118 |
+
combined_roi = np.hstack(roi_images)
|
119 |
+
preprocessed_sequence.append(combined_roi)
|
120 |
+
|
121 |
+
return preprocessed_sequence
|
122 |
+
|
123 |
+
def preprocess_dataset(dataset_dir='dataset', output_dir='preprocessed_sequences', img_size=(64, 64)):
|
124 |
+
"""
|
125 |
+
Preprocesses the entire dataset by processing each video sequence.
|
126 |
+
|
127 |
+
Args:
|
128 |
+
dataset_dir (str): Directory containing labeled data.
|
129 |
+
output_dir (str): Directory to save preprocessed sequences.
|
130 |
+
img_size (tuple): Desired image size for ROIs.
|
131 |
+
"""
|
132 |
+
if not os.path.exists(output_dir):
|
133 |
+
os.makedirs(output_dir)
|
134 |
+
|
135 |
+
# Initialize dlib's face detector and landmark predictor
|
136 |
+
detector = dlib.get_frontal_face_detector()
|
137 |
+
predictor_path = 'shape_predictor_68_face_landmarks.dat'
|
138 |
+
|
139 |
+
if not os.path.exists(predictor_path):
|
140 |
+
print(f"Error: {predictor_path} not found. Download it from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
|
141 |
+
return
|
142 |
+
|
143 |
+
predictor = dlib.shape_predictor(predictor_path)
|
144 |
+
|
145 |
+
classes = os.listdir(dataset_dir)
|
146 |
+
for cls in classes:
|
147 |
+
cls_path = os.path.join(dataset_dir, cls)
|
148 |
+
if not os.path.isdir(cls_path):
|
149 |
+
continue
|
150 |
+
output_cls_dir = os.path.join(output_dir, cls)
|
151 |
+
if not os.path.exists(output_cls_dir):
|
152 |
+
os.makedirs(output_cls_dir)
|
153 |
+
|
154 |
+
print(f"Processing class: {cls}")
|
155 |
+
sequences = os.listdir(cls_path)
|
156 |
+
for seq in tqdm(sequences, desc=f"Class {cls}"):
|
157 |
+
seq_path = os.path.join(cls_path, seq)
|
158 |
+
if not os.path.isdir(seq_path):
|
159 |
+
continue
|
160 |
+
preprocessed_sequence = preprocess_video_sequence(seq_path, detector, predictor, img_size=img_size)
|
161 |
+
if len(preprocessed_sequence) == 0:
|
162 |
+
continue # Skip sequences with no valid frames
|
163 |
+
|
164 |
+
# Stack frames to form a 3D array (frames, height, width, channels)
|
165 |
+
sequence_array = np.stack(preprocessed_sequence, axis=0)
|
166 |
+
|
167 |
+
# Save the preprocessed sequence as a numpy file
|
168 |
+
npy_filename = os.path.join(output_cls_dir, f"{seq}.npy")
|
169 |
+
np.save(npy_filename, sequence_array)
|
170 |
+
|
171 |
+
print("Data preprocessing completed.")
|
172 |
+
|
173 |
+
if __name__ == "__main__":
|
174 |
+
preprocess_dataset(dataset_dir='dataset', output_dir='preprocessed_sequences', img_size=(64, 64))
|
dataset_preparation_sequences.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dataset_preparation_sequences.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from tensorflow.keras.utils import to_categorical
|
7 |
+
import pickle
|
8 |
+
|
9 |
+
def load_sequences(preprocessed_dir='preprocessed_sequences'):
|
10 |
+
"""
|
11 |
+
Loads preprocessed sequences and their labels.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
preprocessed_dir (str): Directory containing preprocessed sequences.
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
tuple: Lists of sequences and labels, label mapping dictionary.
|
18 |
+
"""
|
19 |
+
X = []
|
20 |
+
y = []
|
21 |
+
label_map = {}
|
22 |
+
classes = sorted(os.listdir(preprocessed_dir))
|
23 |
+
|
24 |
+
for idx, cls in enumerate(classes):
|
25 |
+
label_map[cls] = idx
|
26 |
+
cls_path = os.path.join(preprocessed_dir, cls)
|
27 |
+
if not os.path.isdir(cls_path):
|
28 |
+
continue
|
29 |
+
sequence_files = [f for f in os.listdir(cls_path) if f.endswith('.npy')]
|
30 |
+
for seq_file in sequence_files:
|
31 |
+
seq_path = os.path.join(cls_path, seq_file)
|
32 |
+
sequence = np.load(seq_path)
|
33 |
+
X.append(sequence)
|
34 |
+
y.append(idx)
|
35 |
+
|
36 |
+
# X remains a list of numpy arrays with varying shapes
|
37 |
+
y = np.array(y)
|
38 |
+
y = to_categorical(y, num_classes=len(label_map))
|
39 |
+
|
40 |
+
return X, y, label_map
|
41 |
+
|
42 |
+
def pad_sequences_fixed(X, max_seq_length):
|
43 |
+
"""
|
44 |
+
Pads or truncates sequences to a fixed length.
|
45 |
+
|
46 |
+
Args:
|
47 |
+
X (list of numpy.ndarray): List of sequences with shape (frames, height, width, channels).
|
48 |
+
max_seq_length (int): Desired sequence length.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
numpy.ndarray: Padded/truncated sequences.
|
52 |
+
"""
|
53 |
+
padded_X = []
|
54 |
+
for seq in X:
|
55 |
+
if seq.shape[0] < max_seq_length:
|
56 |
+
pad_width = max_seq_length - seq.shape[0]
|
57 |
+
padding = np.zeros((pad_width, *seq.shape[1:]), dtype=seq.dtype)
|
58 |
+
padded_seq = np.concatenate((seq, padding), axis=0)
|
59 |
+
else:
|
60 |
+
padded_seq = seq[:max_seq_length]
|
61 |
+
padded_X.append(padded_seq)
|
62 |
+
return np.array(padded_X)
|
63 |
+
|
64 |
+
def save_dataset(X_train, X_test, y_train, y_test, label_map, output_path='dataset_sequences.pkl'):
|
65 |
+
"""
|
66 |
+
Saves the dataset into a pickle file.
|
67 |
+
|
68 |
+
Args:
|
69 |
+
X_train, X_test, y_train, y_test: Split data.
|
70 |
+
label_map (dict): Mapping from class names to indices.
|
71 |
+
output_path (str): Path to save the pickle file.
|
72 |
+
"""
|
73 |
+
with open(output_path, 'wb') as f:
|
74 |
+
pickle.dump({
|
75 |
+
'X_train': X_train,
|
76 |
+
'X_test': X_test,
|
77 |
+
'y_train': y_train,
|
78 |
+
'y_test': y_test,
|
79 |
+
'label_map': label_map
|
80 |
+
}, f)
|
81 |
+
print(f"Dataset saved to {output_path}.")
|
82 |
+
|
83 |
+
def load_dataset_pickle(pickle_path='dataset_sequences.pkl'):
|
84 |
+
"""
|
85 |
+
Loads the dataset from a pickle file.
|
86 |
+
|
87 |
+
Args:
|
88 |
+
pickle_path (str): Path to the pickle file.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
tuple: Split data and label mapping.
|
92 |
+
"""
|
93 |
+
with open(pickle_path, 'rb') as f:
|
94 |
+
data = pickle.load(f)
|
95 |
+
return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['label_map']
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
# Load sequences
|
99 |
+
X, y, label_map = load_sequences(preprocessed_dir='preprocessed_sequences')
|
100 |
+
print(f"Total samples: {len(X)}")
|
101 |
+
|
102 |
+
# Find the maximum sequence length for padding
|
103 |
+
max_seq_length = max([seq.shape[0] for seq in X])
|
104 |
+
print(f"Maximum sequence length: {max_seq_length}")
|
105 |
+
|
106 |
+
# Pad sequences to have the same length
|
107 |
+
X_padded = pad_sequences_fixed(X, max_seq_length)
|
108 |
+
print(f"Padded sequences shape: {X_padded.shape}")
|
109 |
+
|
110 |
+
# Split into training and testing sets
|
111 |
+
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
|
112 |
+
print(f"Training samples: {X_train.shape[0]}")
|
113 |
+
print(f"Testing samples: {X_test.shape[0]}")
|
114 |
+
|
115 |
+
# Save the dataset
|
116 |
+
save_dataset(X_train, X_test, y_train, y_test, label_map, output_path='dataset_sequences.pkl')
|
dataset_sequences.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abbe955e05fb92417fc7bc27e88998dcf0c311b06bc0803d47483b9d844893e7
|
3 |
+
size 196612009
|
final_model_sequences.keras
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:757fd4422e6703b3a4d9730edaee6643be3e1e32fefe71520d3b2bb341e62be3
|
3 |
+
size 342613105
|
frame_extraction.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# frame_extraction.py
|
2 |
+
|
3 |
+
import cv2
|
4 |
+
import os
|
5 |
+
from tqdm import tqdm
|
6 |
+
|
7 |
+
def extract_frames(video_path, output_dir='frames', prefix='frame'):
|
8 |
+
"""
|
9 |
+
Extracts frames from a video file.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
video_path (str): Path to the input video file.
|
13 |
+
output_dir (str): Directory to save extracted frames.
|
14 |
+
prefix (str): Prefix for the frame filenames.
|
15 |
+
"""
|
16 |
+
if not os.path.exists(output_dir):
|
17 |
+
os.makedirs(output_dir)
|
18 |
+
|
19 |
+
cap = cv2.VideoCapture(video_path)
|
20 |
+
|
21 |
+
if not cap.isOpened():
|
22 |
+
print(f"Error: Could not open video {video_path}.")
|
23 |
+
return
|
24 |
+
|
25 |
+
frame_count = 0
|
26 |
+
frame_total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
27 |
+
|
28 |
+
for _ in tqdm(range(frame_total), desc=f"Extracting frames from {os.path.basename(video_path)}"):
|
29 |
+
ret, frame = cap.read()
|
30 |
+
if not ret:
|
31 |
+
break
|
32 |
+
frame_filename = os.path.join(output_dir, f"{prefix}_frame_{frame_count}.jpg")
|
33 |
+
cv2.imwrite(frame_filename, frame) # Save frame as JPEG file
|
34 |
+
frame_count += 1
|
35 |
+
|
36 |
+
cap.release()
|
37 |
+
print(f"Extracted {frame_count} frames from {video_path}.")
|
38 |
+
|
39 |
+
def process_all_videos(videos_dir='videos', frames_dir='frames'):
|
40 |
+
"""
|
41 |
+
Processes all videos in the specified directory and extracts frames.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
videos_dir (str): Directory containing video files.
|
45 |
+
frames_dir (str): Directory to save extracted frames.
|
46 |
+
"""
|
47 |
+
if not os.path.exists(frames_dir):
|
48 |
+
os.makedirs(frames_dir)
|
49 |
+
|
50 |
+
video_files = [f for f in os.listdir(videos_dir) if f.endswith('.avi') or f.endswith('.mp4')]
|
51 |
+
|
52 |
+
for video_file in video_files:
|
53 |
+
label = video_file.split('_')[0] # Assuming filename format 'label_something.avi'
|
54 |
+
video_path = os.path.join(videos_dir, video_file)
|
55 |
+
output_subdir = os.path.join(frames_dir, label)
|
56 |
+
if not os.path.exists(output_subdir):
|
57 |
+
os.makedirs(output_subdir)
|
58 |
+
extract_frames(video_path, output_dir=output_subdir, prefix=video_file.split('.')[0])
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
process_all_videos(videos_dir='videos', frames_dir='frames')
|
history_sequences.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb39ac8c6291f9fccb70c39c7c9eab41d6c3499fe3cccdfcc88c51231fd07440
|
3 |
+
size 1876
|
model_building_sequences.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model_building_sequences.py
|
2 |
+
|
3 |
+
import tensorflow as tf
|
4 |
+
from tensorflow.keras.models import Sequential
|
5 |
+
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, TimeDistributed, LSTM, BatchNormalization
|
6 |
+
from tensorflow.keras.optimizers import Adam
|
7 |
+
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
|
8 |
+
import pickle
|
9 |
+
|
10 |
+
def build_cnn_lstm_model(input_shape, num_classes):
|
11 |
+
"""
|
12 |
+
Builds a CNN-LSTM model for sequence classification.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
input_shape (tuple): Shape of input sequences (frames, height, width, channels).
|
16 |
+
num_classes (int): Number of output classes.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
tensorflow.keras.Model: Compiled model.
|
20 |
+
"""
|
21 |
+
model = Sequential()
|
22 |
+
|
23 |
+
# Apply Conv2D to each frame in the sequence
|
24 |
+
model.add(TimeDistributed(Conv2D(32, (3,3), activation='relu'), input_shape=input_shape))
|
25 |
+
model.add(TimeDistributed(MaxPooling2D((2,2))))
|
26 |
+
model.add(TimeDistributed(BatchNormalization()))
|
27 |
+
|
28 |
+
# Additional Conv2D layers
|
29 |
+
model.add(TimeDistributed(Conv2D(64, (3,3), activation='relu')))
|
30 |
+
model.add(TimeDistributed(MaxPooling2D((2,2))))
|
31 |
+
model.add(TimeDistributed(BatchNormalization()))
|
32 |
+
|
33 |
+
# Flatten the output from Conv layers
|
34 |
+
model.add(TimeDistributed(Flatten()))
|
35 |
+
|
36 |
+
# LSTM layer to capture temporal dependencies
|
37 |
+
model.add(LSTM(128, return_sequences=False))
|
38 |
+
model.add(Dropout(0.5))
|
39 |
+
|
40 |
+
# Fully connected layer
|
41 |
+
model.add(Dense(128, activation='relu'))
|
42 |
+
model.add(Dropout(0.5))
|
43 |
+
|
44 |
+
# Output layer with softmax activation for classification
|
45 |
+
model.add(Dense(num_classes, activation='softmax'))
|
46 |
+
|
47 |
+
# Compile the model with Adam optimizer and categorical cross-entropy loss
|
48 |
+
model.compile(optimizer=Adam(learning_rate=1e-4),
|
49 |
+
loss='categorical_crossentropy',
|
50 |
+
metrics=['accuracy'])
|
51 |
+
|
52 |
+
return model
|
53 |
+
|
54 |
+
def load_dataset_pickle(pickle_path='dataset_sequences.pkl'):
|
55 |
+
"""
|
56 |
+
Loads the dataset from a pickle file.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
pickle_path (str): Path to the pickle file.
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
tuple: Split data and label mapping.
|
63 |
+
"""
|
64 |
+
with open(pickle_path, 'rb') as f:
|
65 |
+
data = pickle.load(f)
|
66 |
+
return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['label_map']
|
67 |
+
|
68 |
+
def main():
|
69 |
+
# Load the dataset
|
70 |
+
X_train, X_test, y_train, y_test, label_map = load_dataset_pickle('dataset_sequences.pkl')
|
71 |
+
num_classes = y_train.shape[1]
|
72 |
+
input_shape = X_train.shape[1:] # (frames, height, width, channels)
|
73 |
+
|
74 |
+
# Build the CNN-LSTM model
|
75 |
+
model = build_cnn_lstm_model(input_shape, num_classes)
|
76 |
+
model.summary()
|
77 |
+
|
78 |
+
# Define callbacks with updated filepath (.keras)
|
79 |
+
checkpoint = ModelCheckpoint(
|
80 |
+
'best_model_sequences.keras', # Changed from .h5 to .keras
|
81 |
+
monitor='val_accuracy',
|
82 |
+
save_best_only=True,
|
83 |
+
mode='max'
|
84 |
+
)
|
85 |
+
early_stop = EarlyStopping(
|
86 |
+
monitor='val_accuracy',
|
87 |
+
patience=10,
|
88 |
+
restore_best_weights=True
|
89 |
+
)
|
90 |
+
|
91 |
+
# Train the model using GPU
|
92 |
+
with tf.device('/GPU:0'):
|
93 |
+
history = model.fit(
|
94 |
+
X_train, y_train,
|
95 |
+
epochs=50,
|
96 |
+
batch_size=128, # Adjust based on your system's memory
|
97 |
+
validation_data=(X_test, y_test)
|
98 |
+
)
|
99 |
+
|
100 |
+
# Save the final trained model with .keras extension
|
101 |
+
model.save('final_model_sequences.keras') # Changed from .h5 to .keras
|
102 |
+
print("Model training completed and saved as 'final_model_sequences.keras'.")
|
103 |
+
|
104 |
+
# Save training history for future reference
|
105 |
+
with open('history_sequences.pkl', 'wb') as f:
|
106 |
+
pickle.dump(history.history, f)
|
107 |
+
print("Training history saved as 'history_sequences.pkl'.")
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
main()
|
model_evaluation_sequences.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model_evaluation_sequences.py
|
2 |
+
|
3 |
+
import tensorflow as tf
|
4 |
+
import pickle
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
import seaborn as sns
|
8 |
+
from sklearn.metrics import confusion_matrix, classification_report
|
9 |
+
|
10 |
+
def load_model(model_path='best_model_sequences.keras'):
|
11 |
+
"""
|
12 |
+
Loads the trained model.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
model_path (str): Path to the saved model.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
tensorflow.keras.Model: Loaded model.
|
19 |
+
"""
|
20 |
+
model = tf.keras.models.load_model(model_path)
|
21 |
+
return model
|
22 |
+
|
23 |
+
def load_dataset_pickle(pickle_path='dataset_sequences.pkl'):
|
24 |
+
"""
|
25 |
+
Loads the dataset from a pickle file.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
pickle_path (str): Path to the pickle file.
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
tuple: Split data and label mapping.
|
32 |
+
"""
|
33 |
+
with open(pickle_path, 'rb') as f:
|
34 |
+
data = pickle.load(f)
|
35 |
+
return data['X_train'], data['X_test'], data['y_train'], data['y_test'], data['label_map']
|
36 |
+
|
37 |
+
def plot_history(history):
|
38 |
+
"""
|
39 |
+
Plots the training and validation accuracy and loss.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
history (dict): Training history.
|
43 |
+
"""
|
44 |
+
acc = history.get('accuracy', history.get('acc'))
|
45 |
+
val_acc = history.get('val_accuracy', history.get('val_acc'))
|
46 |
+
|
47 |
+
loss = history['loss']
|
48 |
+
val_loss = history['val_loss']
|
49 |
+
|
50 |
+
epochs = range(1, len(acc) + 1)
|
51 |
+
|
52 |
+
plt.figure(figsize=(14,5))
|
53 |
+
|
54 |
+
plt.subplot(1,2,1)
|
55 |
+
plt.plot(epochs, acc, 'b', label='Training accuracy')
|
56 |
+
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
|
57 |
+
plt.title('Training and Validation Accuracy')
|
58 |
+
plt.xlabel('Epochs')
|
59 |
+
plt.ylabel('Accuracy')
|
60 |
+
plt.legend()
|
61 |
+
|
62 |
+
plt.subplot(1,2,2)
|
63 |
+
plt.plot(epochs, loss, 'b', label='Training loss')
|
64 |
+
plt.plot(epochs, val_loss, 'r', label='Validation loss')
|
65 |
+
plt.title('Training and Validation Loss')
|
66 |
+
plt.xlabel('Epochs')
|
67 |
+
plt.ylabel('Loss')
|
68 |
+
plt.legend()
|
69 |
+
|
70 |
+
plt.show()
|
71 |
+
|
72 |
+
def evaluate_model(model, X_test, y_test, label_map):
|
73 |
+
"""
|
74 |
+
Evaluates the model on the test set.
|
75 |
+
|
76 |
+
Args:
|
77 |
+
model (tensorflow.keras.Model): Trained model.
|
78 |
+
X_test (numpy.ndarray): Test sequences.
|
79 |
+
y_test (numpy.ndarray): Test labels.
|
80 |
+
label_map (dict): Mapping from class names to indices.
|
81 |
+
"""
|
82 |
+
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
|
83 |
+
print(f"Test Accuracy: {accuracy * 100:.2f}%")
|
84 |
+
print(f"Test Loss: {loss:.4f}")
|
85 |
+
|
86 |
+
# Predictions
|
87 |
+
y_pred = model.predict(X_test)
|
88 |
+
y_pred_classes = np.argmax(y_pred, axis=1)
|
89 |
+
y_true = np.argmax(y_test, axis=1)
|
90 |
+
|
91 |
+
# Confusion Matrix
|
92 |
+
cm = confusion_matrix(y_true, y_pred_classes)
|
93 |
+
plt.figure(figsize=(10,8))
|
94 |
+
sns.heatmap(cm, annot=True, fmt='d', xticklabels=label_map.keys(), yticklabels=label_map.keys(), cmap='Blues')
|
95 |
+
plt.xlabel('Predicted')
|
96 |
+
plt.ylabel('True')
|
97 |
+
plt.title('Confusion Matrix')
|
98 |
+
plt.show()
|
99 |
+
|
100 |
+
# Classification Report
|
101 |
+
print("Classification Report:")
|
102 |
+
print(classification_report(y_true, y_pred_classes, target_names=label_map.keys()))
|
103 |
+
|
104 |
+
def main():
|
105 |
+
# Load the trained model
|
106 |
+
model = load_model('best_model_sequences.keras')
|
107 |
+
|
108 |
+
# Load the dataset
|
109 |
+
X_train, X_test, y_train, y_test, label_map = load_dataset_pickle('dataset_sequences.pkl')
|
110 |
+
|
111 |
+
# Evaluate the model
|
112 |
+
evaluate_model(model, X_test, y_test, label_map)
|
113 |
+
|
114 |
+
# Load and plot training history
|
115 |
+
try:
|
116 |
+
with open('history_sequences.pkl', 'rb') as f:
|
117 |
+
history = pickle.load(f)
|
118 |
+
plot_history(history)
|
119 |
+
except FileNotFoundError:
|
120 |
+
print("Training history not found. Skipping plotting.")
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
main()
|
prediction_sequences.py
ADDED
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# prediction_sequences.py
|
2 |
+
|
3 |
+
import tensorflow as tf
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import dlib
|
7 |
+
from imutils import face_utils
|
8 |
+
import os
|
9 |
+
import pickle
|
10 |
+
from collections import deque
|
11 |
+
import threading
|
12 |
+
import queue
|
13 |
+
import time
|
14 |
+
|
15 |
+
def load_model(model_path='final_model_sequences.keras'):
|
16 |
+
"""
|
17 |
+
Loads the trained model.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
model_path (str): Path to the saved model.
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
tensorflow.keras.Model: Loaded model.
|
24 |
+
"""
|
25 |
+
model = tf.keras.models.load_model(model_path)
|
26 |
+
return model
|
27 |
+
|
28 |
+
def get_facial_landmarks(detector, predictor, image):
|
29 |
+
"""
|
30 |
+
Detects facial landmarks in an image.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
detector: dlib face detector.
|
34 |
+
predictor: dlib shape predictor.
|
35 |
+
image (numpy.ndarray): Input image.
|
36 |
+
|
37 |
+
Returns:
|
38 |
+
dict: Coordinates of eyes and eyebrows.
|
39 |
+
"""
|
40 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
41 |
+
rects = detector(gray, 1)
|
42 |
+
|
43 |
+
if len(rects) == 0:
|
44 |
+
return None # No face detected
|
45 |
+
|
46 |
+
# Assuming the first detected face is the target
|
47 |
+
rect = rects[0]
|
48 |
+
shape = predictor(gray, rect)
|
49 |
+
shape = face_utils.shape_to_np(shape)
|
50 |
+
|
51 |
+
landmarks = {}
|
52 |
+
# Define landmarks for left and right eyes and eyebrows
|
53 |
+
landmarks['left_eye'] = shape[36:42] # Left eye landmarks
|
54 |
+
landmarks['right_eye'] = shape[42:48] # Right eye landmarks
|
55 |
+
landmarks['left_eyebrow'] = shape[17:22] # Left eyebrow landmarks
|
56 |
+
landmarks['right_eyebrow'] = shape[22:27] # Right eyebrow landmarks
|
57 |
+
|
58 |
+
return landmarks
|
59 |
+
|
60 |
+
def extract_roi(image, landmarks, region='left_eye', padding=5):
|
61 |
+
"""
|
62 |
+
Extracts a region of interest (ROI) from the image based on landmarks.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
image (numpy.ndarray): Input image.
|
66 |
+
landmarks (dict): Facial landmarks.
|
67 |
+
region (str): Region to extract ('left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow').
|
68 |
+
padding (int): Padding around the ROI.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
numpy.ndarray: Extracted ROI.
|
72 |
+
"""
|
73 |
+
points = landmarks.get(region)
|
74 |
+
if points is None:
|
75 |
+
return None
|
76 |
+
|
77 |
+
# Compute the bounding box
|
78 |
+
x, y, w, h = cv2.boundingRect(points)
|
79 |
+
x = max(x - padding, 0)
|
80 |
+
y = max(y - padding, 0)
|
81 |
+
w = w + 2 * padding
|
82 |
+
h = h + 2 * padding
|
83 |
+
|
84 |
+
roi = image[y:y+h, x:x+w]
|
85 |
+
return roi
|
86 |
+
|
87 |
+
def preprocess_frame(image, detector, predictor, img_size=(64, 64)):
|
88 |
+
"""
|
89 |
+
Preprocesses a single frame: detects landmarks, extracts ROIs, and prepares the input.
|
90 |
+
|
91 |
+
Args:
|
92 |
+
image (numpy.ndarray): Input frame.
|
93 |
+
detector: dlib face detector.
|
94 |
+
predictor: dlib shape predictor.
|
95 |
+
img_size (tuple): Desired image size for ROIs.
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
numpy.ndarray: Preprocessed frame as a concatenated ROI image.
|
99 |
+
"""
|
100 |
+
landmarks = get_facial_landmarks(detector, predictor, image)
|
101 |
+
if landmarks is None:
|
102 |
+
return None # No face detected
|
103 |
+
|
104 |
+
# Extract ROIs for eyes and eyebrows
|
105 |
+
rois = {}
|
106 |
+
rois['left_eye'] = extract_roi(image, landmarks, 'left_eye')
|
107 |
+
rois['right_eye'] = extract_roi(image, landmarks, 'right_eye')
|
108 |
+
rois['left_eyebrow'] = extract_roi(image, landmarks, 'left_eyebrow')
|
109 |
+
rois['right_eyebrow'] = extract_roi(image, landmarks, 'right_eyebrow')
|
110 |
+
|
111 |
+
# Process ROIs
|
112 |
+
roi_images = []
|
113 |
+
for region in ['left_eye', 'right_eye', 'left_eyebrow', 'right_eyebrow']:
|
114 |
+
roi = rois.get(region)
|
115 |
+
if roi is not None:
|
116 |
+
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Convert to grayscale
|
117 |
+
roi = cv2.resize(roi, img_size)
|
118 |
+
roi = roi.astype('float32') / 255.0 # Normalize to [0,1]
|
119 |
+
roi = np.expand_dims(roi, axis=-1) # Add channel dimension
|
120 |
+
roi_images.append(roi)
|
121 |
+
|
122 |
+
if len(roi_images) == 0:
|
123 |
+
return None # No ROIs extracted
|
124 |
+
|
125 |
+
# Concatenate ROIs horizontally to form a single image
|
126 |
+
combined_roi = np.hstack(roi_images)
|
127 |
+
return combined_roi
|
128 |
+
|
129 |
+
def movement_to_text(label_map):
|
130 |
+
"""
|
131 |
+
Creates a mapping from class indices to text.
|
132 |
+
|
133 |
+
Args:
|
134 |
+
label_map (dict): Mapping from class names to indices.
|
135 |
+
|
136 |
+
Returns:
|
137 |
+
dict: Mapping from indices to text descriptions.
|
138 |
+
"""
|
139 |
+
movement_to_text_map = {
|
140 |
+
'upward_eyebrow': 'Eyebrow Raised',
|
141 |
+
'downward_eyebrow': 'Eyebrow Lowered',
|
142 |
+
'left_eye': 'Left Eye Movement',
|
143 |
+
'right_eye': 'Right Eye Movement',
|
144 |
+
# Add more mappings as needed
|
145 |
+
}
|
146 |
+
|
147 |
+
# Create index to text mapping
|
148 |
+
index_to_text = {}
|
149 |
+
for cls, idx in label_map.items():
|
150 |
+
text = movement_to_text_map.get(cls, cls)
|
151 |
+
index_to_text[idx] = text
|
152 |
+
return index_to_text
|
153 |
+
|
154 |
+
def prediction_worker(model, input_queue, output_queue, max_seq_length):
|
155 |
+
"""
|
156 |
+
Worker thread for handling model predictions.
|
157 |
+
|
158 |
+
Args:
|
159 |
+
model (tensorflow.keras.Model): Trained model.
|
160 |
+
input_queue (queue.Queue): Queue to receive sequences for prediction.
|
161 |
+
output_queue (queue.Queue): Queue to send prediction results.
|
162 |
+
max_seq_length (int): Fixed sequence length for the model.
|
163 |
+
"""
|
164 |
+
while True:
|
165 |
+
sequence = input_queue.get()
|
166 |
+
if sequence is None:
|
167 |
+
break # Sentinel to stop the thread
|
168 |
+
|
169 |
+
# Pad or truncate the sequence to match the model's expected input
|
170 |
+
if sequence.shape[0] < max_seq_length:
|
171 |
+
pad_width = max_seq_length - sequence.shape[0]
|
172 |
+
padding = np.zeros((pad_width, *sequence.shape[1:]), dtype=sequence.dtype)
|
173 |
+
sequence_padded = np.concatenate((sequence, padding), axis=0)
|
174 |
+
else:
|
175 |
+
sequence_padded = sequence[:max_seq_length]
|
176 |
+
|
177 |
+
# Expand dimensions to match model input (1, frames, height, width, channels)
|
178 |
+
sequence_padded = np.expand_dims(sequence_padded, axis=0)
|
179 |
+
|
180 |
+
# Perform prediction
|
181 |
+
prediction = model.predict(sequence_padded)
|
182 |
+
class_idx = np.argmax(prediction)
|
183 |
+
confidence = np.max(prediction)
|
184 |
+
|
185 |
+
# Put the result in the output queue
|
186 |
+
output_queue.put((class_idx, confidence))
|
187 |
+
|
188 |
+
def main():
|
189 |
+
# Load the trained model
|
190 |
+
model = load_model('final_model_sequences.keras')
|
191 |
+
|
192 |
+
# Load label map
|
193 |
+
with open('dataset_sequences.pkl', 'rb') as f:
|
194 |
+
data = pickle.load(f)
|
195 |
+
label_map = data['label_map']
|
196 |
+
index_to_text = movement_to_text(label_map)
|
197 |
+
|
198 |
+
# Initialize dlib's face detector and landmark predictor
|
199 |
+
detector = dlib.get_frontal_face_detector()
|
200 |
+
predictor_path = 'shape_predictor_68_face_landmarks.dat'
|
201 |
+
|
202 |
+
if not os.path.exists(predictor_path):
|
203 |
+
print(f"Error: {predictor_path} not found. Download it from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
|
204 |
+
return
|
205 |
+
|
206 |
+
predictor = dlib.shape_predictor(predictor_path)
|
207 |
+
|
208 |
+
# Initialize queues for communication between threads
|
209 |
+
input_queue = queue.Queue()
|
210 |
+
output_queue = queue.Queue()
|
211 |
+
|
212 |
+
# Define sequence length (number of frames)
|
213 |
+
max_seq_length = 20 # Adjust based on your training data
|
214 |
+
|
215 |
+
# Start the prediction worker thread
|
216 |
+
pred_thread = threading.Thread(target=prediction_worker, args=(model, input_queue, output_queue, max_seq_length))
|
217 |
+
pred_thread.daemon = True
|
218 |
+
pred_thread.start()
|
219 |
+
|
220 |
+
# Start video capture
|
221 |
+
cap = cv2.VideoCapture(0)
|
222 |
+
|
223 |
+
if not cap.isOpened():
|
224 |
+
print("Error: Could not open webcam.")
|
225 |
+
return
|
226 |
+
|
227 |
+
print("Starting real-time prediction. Press 'q' to quit.")
|
228 |
+
|
229 |
+
# Initialize a deque to store the sequence of preprocessed frames
|
230 |
+
frame_buffer = deque(maxlen=max_seq_length)
|
231 |
+
|
232 |
+
# Variable to store the latest prediction result
|
233 |
+
latest_prediction = "Initializing..."
|
234 |
+
|
235 |
+
while True:
|
236 |
+
ret, frame = cap.read()
|
237 |
+
if not ret:
|
238 |
+
print("Failed to grab frame.")
|
239 |
+
break
|
240 |
+
|
241 |
+
# Preprocess the current frame
|
242 |
+
preprocessed_frame = preprocess_frame(frame, detector, predictor, img_size=(64, 64))
|
243 |
+
if preprocessed_frame is not None:
|
244 |
+
frame_buffer.append(preprocessed_frame)
|
245 |
+
else:
|
246 |
+
# If no face detected, append a zero array to maintain sequence length
|
247 |
+
frame_buffer.append(np.zeros((64, 256, 1), dtype='float32'))
|
248 |
+
|
249 |
+
# If the buffer is full, send the sequence to the prediction thread
|
250 |
+
if len(frame_buffer) == max_seq_length:
|
251 |
+
# Convert deque to numpy array
|
252 |
+
sequence_array = np.array(frame_buffer)
|
253 |
+
input_queue.put(sequence_array)
|
254 |
+
|
255 |
+
# Check if there's a new prediction result
|
256 |
+
try:
|
257 |
+
while True:
|
258 |
+
class_idx, confidence = output_queue.get_nowait()
|
259 |
+
movement = index_to_text.get(class_idx, "Unknown")
|
260 |
+
latest_prediction = f"{movement} ({confidence*100:.2f}%)"
|
261 |
+
except queue.Empty:
|
262 |
+
pass # No new prediction
|
263 |
+
|
264 |
+
# Display the prediction on the frame
|
265 |
+
cv2.putText(frame, latest_prediction, (30, 30), cv2.FONT_HERSHEY_SIMPLEX,
|
266 |
+
0.8, (0, 255, 0), 2, cv2.LINE_AA)
|
267 |
+
|
268 |
+
# Display the frame
|
269 |
+
cv2.imshow('Real-time Movement Prediction', frame)
|
270 |
+
|
271 |
+
# Exit condition
|
272 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
273 |
+
break
|
274 |
+
|
275 |
+
# Cleanup
|
276 |
+
cap.release()
|
277 |
+
cv2.destroyAllWindows()
|
278 |
+
|
279 |
+
# Stop the prediction thread
|
280 |
+
input_queue.put(None) # Sentinel to stop the thread
|
281 |
+
pred_thread.join()
|
282 |
+
|
283 |
+
if __name__ == "__main__":
|
284 |
+
main()
|
shape_predictor_68_face_landmarks.dat
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
|
3 |
+
size 99693937
|
video_capture.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# video_capture.py
|
2 |
+
|
3 |
+
import cv2
|
4 |
+
import os
|
5 |
+
|
6 |
+
def record_video(duration=2, output_dir='videos', filename='sample'):
|
7 |
+
"""
|
8 |
+
Records a short video from the webcam.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
duration (int): Duration of the video in seconds.
|
12 |
+
output_dir (str): Directory to save the videos.
|
13 |
+
filename (str): Name of the output video file.
|
14 |
+
"""
|
15 |
+
if not os.path.exists(output_dir):
|
16 |
+
os.makedirs(output_dir)
|
17 |
+
|
18 |
+
cap = cv2.VideoCapture(0) # Initialize webcam
|
19 |
+
|
20 |
+
if not cap.isOpened():
|
21 |
+
print("Error: Could not open webcam.")
|
22 |
+
return
|
23 |
+
|
24 |
+
# Get default camera resolution
|
25 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
26 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
27 |
+
fps = 20 # Frames per second
|
28 |
+
|
29 |
+
# Define the codec and create VideoWriter object
|
30 |
+
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
31 |
+
out = cv2.VideoWriter(os.path.join(output_dir, f"{filename}.avi"), fourcc, fps, (frame_width, frame_height))
|
32 |
+
|
33 |
+
print("Recording started. Press 'q' to stop early.")
|
34 |
+
|
35 |
+
frame_count = 0
|
36 |
+
total_frames = duration * fps
|
37 |
+
|
38 |
+
while frame_count < total_frames:
|
39 |
+
ret, frame = cap.read()
|
40 |
+
if ret:
|
41 |
+
out.write(frame) # Write frame to video file
|
42 |
+
cv2.imshow('Recording', frame)
|
43 |
+
frame_count += 1
|
44 |
+
|
45 |
+
# Press 'q' to quit early
|
46 |
+
if cv2.waitKey(1) & 0xFF == ord('q'):
|
47 |
+
break
|
48 |
+
else:
|
49 |
+
print("Failed to grab frame.")
|
50 |
+
break
|
51 |
+
|
52 |
+
# Release resources
|
53 |
+
cap.release()
|
54 |
+
out.release()
|
55 |
+
cv2.destroyAllWindows()
|
56 |
+
print(f"Recording finished. Video saved as {filename}.avi")
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
# Example: Record a 2-second video named 'movement1'
|
60 |
+
label = input("Enter movement label (e.g., 'upward_eyebrow'): ")
|
61 |
+
filename = input("Enter filename (e.g., 'movement1'): ")
|
62 |
+
record_video(duration=2, filename=filename)
|