mirror of
https://github.com/ck-zhang/EyePy.git
synced 2026-01-06 11:39:30 -06:00
183 lines
6.2 KiB
Python
183 lines
6.2 KiB
Python
import cv2
|
|
import mediapipe as mp
|
|
import numpy as np
|
|
from sklearn.linear_model import Ridge
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
class GazeEstimator:
|
|
def __init__(self, use_separate_models=False):
|
|
self.face_mesh = mp.solutions.face_mesh.FaceMesh(
|
|
static_image_mode=False,
|
|
max_num_faces=1,
|
|
refine_landmarks=True,
|
|
min_detection_confidence=0.5,
|
|
)
|
|
self.use_separate_models = use_separate_models
|
|
self.variable_scaling = None
|
|
|
|
if self.use_separate_models:
|
|
self.scaler_x = StandardScaler()
|
|
self.scaler_y = StandardScaler()
|
|
self.model_x = None
|
|
self.model_y = None
|
|
else:
|
|
self.model = None
|
|
self.scaler = StandardScaler()
|
|
|
|
def extract_features(self, image):
|
|
"""
|
|
Takes in image and returns features
|
|
"""
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
results = self.face_mesh.process(image_rgb)
|
|
|
|
if not results.multi_face_landmarks:
|
|
return None, None
|
|
|
|
face_landmarks = results.multi_face_landmarks[0]
|
|
landmarks = face_landmarks.landmark
|
|
|
|
left_pupil = np.array([landmarks[468].x, landmarks[468].y])
|
|
right_pupil = np.array([landmarks[473].x, landmarks[473].y])
|
|
|
|
left_eye_inner = np.array([landmarks[133].x, landmarks[133].y])
|
|
left_eye_outer = np.array([landmarks[33].x, landmarks[33].y])
|
|
left_eye_top = np.array([landmarks[159].x, landmarks[159].y])
|
|
left_eye_bottom = np.array([landmarks[145].x, landmarks[145].y])
|
|
|
|
right_eye_inner = np.array([landmarks[362].x, landmarks[362].y])
|
|
right_eye_outer = np.array([landmarks[263].x, landmarks[263].y])
|
|
right_eye_top = np.array([landmarks[386].x, landmarks[386].y])
|
|
right_eye_bottom = np.array([landmarks[374].x, landmarks[374].y])
|
|
|
|
left_pupil_rel = self._calculate_relative_position(
|
|
left_pupil, left_eye_inner, left_eye_outer, left_eye_top, left_eye_bottom
|
|
)
|
|
right_pupil_rel = self._calculate_relative_position(
|
|
right_pupil,
|
|
right_eye_inner,
|
|
right_eye_outer,
|
|
right_eye_top,
|
|
right_eye_bottom,
|
|
)
|
|
|
|
yaw, pitch = self._calculate_head_orientation(landmarks)
|
|
|
|
features = np.hstack([left_pupil_rel, right_pupil_rel, [yaw, pitch]])
|
|
|
|
# Blink detection
|
|
left_eye_width = np.linalg.norm(left_eye_outer - left_eye_inner)
|
|
left_eye_height = np.linalg.norm(left_eye_top - left_eye_bottom)
|
|
left_EAR = left_eye_height / left_eye_width
|
|
|
|
right_eye_width = np.linalg.norm(right_eye_outer - right_eye_inner)
|
|
right_eye_height = np.linalg.norm(right_eye_top - right_eye_bottom)
|
|
right_EAR = right_eye_height / right_eye_width
|
|
|
|
EAR = (left_EAR + right_EAR) / 2
|
|
|
|
blink_threshold = 0.2
|
|
|
|
if EAR < blink_threshold:
|
|
blink_detected = True
|
|
else:
|
|
blink_detected = False
|
|
|
|
return features, blink_detected
|
|
|
|
def _calculate_relative_position(
|
|
self, pupil, inner_corner, outer_corner, top_point, bottom_point
|
|
):
|
|
"""
|
|
Calculates relative pupil position within the eye
|
|
"""
|
|
eye_width = np.linalg.norm(outer_corner - inner_corner)
|
|
horizontal_pos = np.dot(pupil - inner_corner, outer_corner - inner_corner) / (
|
|
eye_width**2
|
|
)
|
|
|
|
eye_height = np.linalg.norm(top_point - bottom_point)
|
|
vertical_pos = np.dot(pupil - bottom_point, top_point - bottom_point) / (
|
|
eye_height**2
|
|
)
|
|
|
|
return np.array([horizontal_pos, vertical_pos])
|
|
|
|
def _calculate_head_orientation(self, landmarks):
|
|
"""
|
|
Calculates head orientation
|
|
"""
|
|
nose_tip = np.array([landmarks[1].x, landmarks[1].y])
|
|
|
|
left_eye_outer = np.array([landmarks[33].x, landmarks[33].y])
|
|
right_eye_outer = np.array([landmarks[263].x, landmarks[263].y])
|
|
eye_center = (left_eye_outer + right_eye_outer) / 2
|
|
|
|
yaw = nose_tip[0] - eye_center[0]
|
|
pitch = nose_tip[1] - eye_center[1]
|
|
|
|
return yaw, pitch
|
|
|
|
def train(self, X, y, alpha=1.0, variable_scaling=None):
|
|
"""
|
|
Trains gaze prediction model
|
|
"""
|
|
self.variable_scaling = variable_scaling
|
|
|
|
if self.use_separate_models:
|
|
X_x = X[:, [0, 2, 4]] # horizontal ratios and yaw
|
|
X_y = X[:, [1, 3, 5]] # vertical ratios and pitch
|
|
|
|
X_x_scaled = self.scaler_x.fit_transform(X_x)
|
|
X_y_scaled = self.scaler_y.fit_transform(X_y)
|
|
|
|
if self.variable_scaling is not None:
|
|
X_x_scaled *= self.variable_scaling
|
|
X_y_scaled *= self.variable_scaling
|
|
|
|
self.model_x = Ridge(alpha=alpha)
|
|
self.model_y = Ridge(alpha=alpha)
|
|
self.model_x.fit(X_x_scaled, y[:, 0])
|
|
self.model_y.fit(X_y_scaled, y[:, 1])
|
|
else:
|
|
X_scaled = self.scaler.fit_transform(X)
|
|
|
|
if self.variable_scaling is not None:
|
|
X_scaled *= self.variable_scaling
|
|
|
|
self.model = Ridge(alpha=alpha)
|
|
self.model.fit(X_scaled, y)
|
|
|
|
def predict(self, X):
|
|
"""
|
|
Predicts gaze location
|
|
"""
|
|
if self.use_separate_models:
|
|
if self.model_x is None or self.model_y is None:
|
|
raise Exception("Models are not trained yet.")
|
|
|
|
X_x = X[:, [0, 2, 4]] # horizontal ratios and yaw
|
|
X_y = X[:, [1, 3, 5]] # vertical ratios and pitch
|
|
|
|
X_x_scaled = self.scaler_x.transform(X_x)
|
|
X_y_scaled = self.scaler_y.transform(X_y)
|
|
|
|
if self.variable_scaling is not None:
|
|
X_x_scaled *= self.variable_scaling
|
|
X_y_scaled *= self.variable_scaling
|
|
|
|
x_pred = self.model_x.predict(X_x_scaled)
|
|
y_pred = self.model_y.predict(X_y_scaled)
|
|
return np.vstack((x_pred, y_pred)).T
|
|
else:
|
|
if self.model is None:
|
|
raise Exception("Model is not trained yet.")
|
|
|
|
X_scaled = self.scaler.transform(X)
|
|
|
|
if self.variable_scaling is not None:
|
|
X_scaled *= self.variable_scaling
|
|
|
|
return self.model.predict(X_scaled)
|