mirror of
https://github.com/ck-zhang/EyePy.git
synced 2026-02-18 10:49:59 -06:00
Refactor to library
This commit is contained in:
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +0,0 @@
|
||||
shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
|
||||
@@ -1 +1 @@
|
||||
3.12
|
||||
>=3.9
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
from .gaze_estimator import GazeEstimator
|
||||
400
calibration.py
400
calibration.py
@@ -1,400 +0,0 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import tkinter as tk
|
||||
import time
|
||||
from gaze_estimator import GazeEstimator
|
||||
|
||||
|
||||
def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur=2):
|
||||
"""
|
||||
Waits for a face to be detected (not blinking), then does a countdown ellipse.
|
||||
"""
|
||||
cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
||||
fd_start = None
|
||||
countdown = False
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
f, blink = gaze_estimator.extract_features(frame)
|
||||
face = f is not None and not blink
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
now = time.time()
|
||||
if face:
|
||||
if not countdown:
|
||||
fd_start = now
|
||||
countdown = True
|
||||
elapsed = now - fd_start
|
||||
if elapsed >= dur:
|
||||
return True
|
||||
t = elapsed / dur
|
||||
e = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - e)
|
||||
cv2.ellipse(
|
||||
c, (sw // 2, sh // 2), (50, 50), 0, -90, -90 + ang, (0, 255, 0), -1
|
||||
)
|
||||
else:
|
||||
countdown = False
|
||||
fd_start = None
|
||||
txt = "Face not detected"
|
||||
fs = 2
|
||||
thick = 3
|
||||
size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick)
|
||||
tx = (sw - size[0]) // 2
|
||||
ty = (sh + size[1]) // 2
|
||||
cv2.putText(
|
||||
c, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick
|
||||
)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
return False
|
||||
|
||||
|
||||
def run_9_point_calibration(gaze_estimator, camera_index=0):
|
||||
"""
|
||||
Standard 9-point calibration
|
||||
"""
|
||||
root = tk.Tk()
|
||||
sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
root.destroy()
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
mx, my = int(sw * 0.1), int(sh * 0.1)
|
||||
gw, gh = sw - 2 * mx, sh - 2 * my
|
||||
order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)]
|
||||
pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
|
||||
feats, targs = [], []
|
||||
pulse_d, cd_d = 1.0, 1.0
|
||||
for cycle in range(1):
|
||||
for x, y in pts:
|
||||
ps = time.time()
|
||||
final_radius = 20
|
||||
while True:
|
||||
e = time.time() - ps
|
||||
if e > pulse_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
|
||||
final_radius = radius
|
||||
cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
cs = time.time()
|
||||
while True:
|
||||
e = time.time() - cs
|
||||
if e > cd_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
|
||||
t = e / cd_d
|
||||
ease = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - ease)
|
||||
cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
|
||||
|
||||
def run_5_point_calibration(gaze_estimator, camera_index=0):
|
||||
"""
|
||||
Simpler 5-point calibration
|
||||
"""
|
||||
root = tk.Tk()
|
||||
sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
root.destroy()
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
m = 100
|
||||
# center, top-left, top-right, bottom-left, bottom-right
|
||||
order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)]
|
||||
pts = []
|
||||
for r, c in order:
|
||||
x = m if c == 0 else (sw - m if c == 2 else sw // 2)
|
||||
y = m if r == 0 else (sh - m if r == 2 else sh // 2)
|
||||
pts.append((x, y))
|
||||
feats, targs = [], []
|
||||
pd, cd = 1.0, 1.0
|
||||
for cycle in range(1):
|
||||
for x, y in pts:
|
||||
ps = time.time()
|
||||
final_radius = 20
|
||||
while True:
|
||||
e = time.time() - ps
|
||||
if e > pd:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
|
||||
final_radius = radius
|
||||
cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
cs = time.time()
|
||||
while True:
|
||||
e = time.time() - cs
|
||||
if e > cd:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
|
||||
t = e / cd
|
||||
ease = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - ease)
|
||||
cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
|
||||
|
||||
def run_lissajous_calibration(gaze_estimator, camera_index=0):
|
||||
"""
|
||||
Moves a calibration point in a Lissajous curve
|
||||
"""
|
||||
root = tk.Tk()
|
||||
sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
root.destroy()
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0
|
||||
|
||||
def curve(t):
|
||||
return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2)
|
||||
|
||||
tt = 5.0
|
||||
fps = 60
|
||||
frames = int(tt * fps)
|
||||
feats, targs = [], []
|
||||
vals = []
|
||||
acc = 0
|
||||
|
||||
# Generate a time scale that speeds up / slows down sinusoidally
|
||||
for i in range(frames):
|
||||
frac = i / (frames - 1)
|
||||
spd = 0.3 + 0.7 * np.sin(np.pi * frac)
|
||||
acc += spd / fps
|
||||
end = acc
|
||||
if end < 1e-6:
|
||||
end = 1e-6
|
||||
acc = 0
|
||||
|
||||
for i in range(frames):
|
||||
frac = i / (frames - 1)
|
||||
spd = 0.3 + 0.7 * np.sin(np.pi * frac)
|
||||
acc += spd / fps
|
||||
t = (acc / end) * (2 * np.pi)
|
||||
ret, f = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
x, y = curve(t)
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
break
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
|
||||
|
||||
def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
"""
|
||||
Quick fine-tuning pass to adjust Kalman filter's measurementNoiseCov.
|
||||
"""
|
||||
root = tk.Tk()
|
||||
screen_width = root.winfo_screenwidth()
|
||||
screen_height = root.winfo_screenheight()
|
||||
root.destroy()
|
||||
|
||||
initial_points = [
|
||||
{
|
||||
"position": (screen_width // 2, screen_height // 4),
|
||||
"start_time": None,
|
||||
"data_collection_started": False,
|
||||
"collection_start_time": None,
|
||||
"collected_gaze": [],
|
||||
},
|
||||
{
|
||||
"position": (screen_width // 4, 3 * screen_height // 4),
|
||||
"start_time": None,
|
||||
"data_collection_started": False,
|
||||
"collection_start_time": None,
|
||||
"collected_gaze": [],
|
||||
},
|
||||
{
|
||||
"position": (3 * screen_width // 4, 3 * screen_height // 4),
|
||||
"start_time": None,
|
||||
"data_collection_started": False,
|
||||
"collection_start_time": None,
|
||||
"collected_gaze": [],
|
||||
},
|
||||
]
|
||||
|
||||
points = initial_points.copy()
|
||||
proximity_threshold = screen_width / 5
|
||||
initial_delay = 0.5
|
||||
data_collection_duration = 0.5
|
||||
|
||||
cv2.namedWindow("Fine Tuning", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
gaze_positions = []
|
||||
|
||||
while len(points) > 0:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
|
||||
|
||||
for point in points:
|
||||
cv2.circle(canvas, point["position"], 20, (0, 255, 0), -1)
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 1.5
|
||||
color = (255, 255, 255)
|
||||
thickness = 2
|
||||
text = "Look at the points until they disappear"
|
||||
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
|
||||
text_x = (screen_width - text_size[0]) // 2
|
||||
text_y = screen_height - 50
|
||||
cv2.putText(canvas, text, (text_x, text_y), font, font_scale, color, thickness)
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
if features is not None and not blink_detected:
|
||||
X = np.array([features])
|
||||
gaze_point = gaze_estimator.predict(X)[0]
|
||||
gaze_x, gaze_y = int(gaze_point[0]), int(gaze_point[1])
|
||||
|
||||
cv2.circle(canvas, (gaze_x, gaze_y), 10, (255, 0, 0), -1)
|
||||
|
||||
for point in points[:]:
|
||||
dx = gaze_x - point["position"][0]
|
||||
dy = gaze_y - point["position"][1]
|
||||
distance = np.sqrt(dx * dx + dy * dy)
|
||||
if distance <= proximity_threshold:
|
||||
if point["start_time"] is None:
|
||||
point["start_time"] = current_time
|
||||
point["data_collection_started"] = False
|
||||
point["collection_start_time"] = None
|
||||
point["collected_gaze"] = []
|
||||
elapsed_time = current_time - point["start_time"]
|
||||
|
||||
if (
|
||||
not point["data_collection_started"]
|
||||
and elapsed_time >= initial_delay
|
||||
):
|
||||
point["data_collection_started"] = True
|
||||
point["collection_start_time"] = current_time
|
||||
point["collected_gaze"] = []
|
||||
|
||||
if point["data_collection_started"]:
|
||||
data_collection_elapsed = (
|
||||
current_time - point["collection_start_time"]
|
||||
)
|
||||
point["collected_gaze"].append([gaze_x, gaze_y])
|
||||
shake_amplitude = int(
|
||||
5
|
||||
+ (data_collection_elapsed / data_collection_duration) * 20
|
||||
)
|
||||
shake_x = int(
|
||||
np.random.uniform(-shake_amplitude, shake_amplitude)
|
||||
)
|
||||
shake_y = int(
|
||||
np.random.uniform(-shake_amplitude, shake_amplitude)
|
||||
)
|
||||
shaken_position = (
|
||||
point["position"][0] + shake_x,
|
||||
point["position"][1] + shake_y,
|
||||
)
|
||||
cv2.circle(canvas, shaken_position, 20, (0, 255, 0), -1)
|
||||
|
||||
if data_collection_elapsed >= data_collection_duration:
|
||||
gaze_positions.extend(point["collected_gaze"])
|
||||
points.remove(point)
|
||||
else:
|
||||
cv2.circle(canvas, point["position"], 25, (0, 255, 255), 2)
|
||||
else:
|
||||
point["start_time"] = None
|
||||
point["data_collection_started"] = False
|
||||
point["collection_start_time"] = None
|
||||
point["collected_gaze"] = []
|
||||
else:
|
||||
for point in points:
|
||||
point["start_time"] = None
|
||||
point["data_collection_started"] = False
|
||||
point["collection_start_time"] = None
|
||||
point["collected_gaze"] = []
|
||||
|
||||
cv2.imshow("Fine Tuning", canvas)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyWindow("Fine Tuning")
|
||||
return
|
||||
|
||||
cap.release()
|
||||
cv2.destroyWindow("Fine Tuning")
|
||||
|
||||
gaze_positions = np.array(gaze_positions)
|
||||
if gaze_positions.shape[0] < 2:
|
||||
return
|
||||
|
||||
gaze_variance = np.var(gaze_positions, axis=0)
|
||||
gaze_variance[gaze_variance == 0] = 1e-4
|
||||
kalman.measurementNoiseCov = np.array(
|
||||
[[gaze_variance[0], 0], [0, gaze_variance[1]]], dtype=np.float32
|
||||
)
|
||||
272
demo.py
272
demo.py
@@ -1,272 +0,0 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import tkinter as tk
|
||||
import time
|
||||
import argparse
|
||||
import os
|
||||
from gaze_estimator import GazeEstimator
|
||||
from calibration import (
|
||||
run_9_point_calibration,
|
||||
run_5_point_calibration,
|
||||
run_lissajous_calibration,
|
||||
fine_tune_kalman_filter,
|
||||
)
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Gaze Estimation with Kalman Filter or KDE"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--filter",
|
||||
choices=["kalman", "kde", "none"],
|
||||
default="none",
|
||||
help="Filter method: kalman, kde, or none",
|
||||
)
|
||||
parser.add_argument("--camera", type=int, default=0, help="Camera index")
|
||||
parser.add_argument(
|
||||
"--calibration",
|
||||
choices=["9p", "5p", "lissajous"],
|
||||
default="9p",
|
||||
help="Choose calibration method (9p, 5p, or lissajous).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--background", type=str, default=None, help="Path to background image"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--confidence",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Confidence interval for KDE contour (0 < value < 1)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
filter_method = args.filter
|
||||
camera_index = args.camera
|
||||
calibration_method = args.calibration
|
||||
background_path = args.background
|
||||
confidence_level = args.confidence
|
||||
|
||||
gaze_estimator = GazeEstimator()
|
||||
|
||||
# Run the chosen calibration method (default 9p)
|
||||
if calibration_method == "9p":
|
||||
run_9_point_calibration(gaze_estimator, camera_index=camera_index)
|
||||
elif calibration_method == "5p":
|
||||
run_5_point_calibration(gaze_estimator, camera_index=camera_index)
|
||||
else:
|
||||
run_lissajous_calibration(gaze_estimator, camera_index=camera_index)
|
||||
|
||||
if filter_method == "kalman":
|
||||
kalman = cv2.KalmanFilter(4, 2)
|
||||
kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
|
||||
kalman.transitionMatrix = np.array(
|
||||
[[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]],
|
||||
np.float32,
|
||||
)
|
||||
kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10
|
||||
kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1
|
||||
kalman.statePre = np.zeros((4, 1), np.float32)
|
||||
kalman.statePost = np.zeros((4, 1), np.float32)
|
||||
|
||||
fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=camera_index)
|
||||
|
||||
root = tk.Tk()
|
||||
screen_width = root.winfo_screenwidth()
|
||||
screen_height = root.winfo_screenheight()
|
||||
root.destroy()
|
||||
|
||||
cam_width, cam_height = 320, 240
|
||||
|
||||
if background_path and os.path.isfile(background_path):
|
||||
background = cv2.imread(background_path)
|
||||
background = cv2.resize(background, (screen_width, screen_height))
|
||||
else:
|
||||
background = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
|
||||
background[:] = (50, 50, 50)
|
||||
|
||||
cv2.namedWindow("Gaze Estimation", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty(
|
||||
"Gaze Estimation", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN
|
||||
)
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
prev_time = time.time()
|
||||
|
||||
if filter_method == "kde":
|
||||
gaze_history = []
|
||||
time_window = 0.5 # seconds
|
||||
|
||||
# Variables for gaze cursor fade effect
|
||||
cursor_alpha = 0.0
|
||||
cursor_alpha_step = 0.05
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
if features is not None and not blink_detected:
|
||||
X = np.array([features])
|
||||
gaze_point = gaze_estimator.predict(X)[0]
|
||||
x, y = int(gaze_point[0]), int(gaze_point[1])
|
||||
|
||||
if filter_method == "kalman":
|
||||
prediction = kalman.predict()
|
||||
x_pred = int(prediction[0][0])
|
||||
y_pred = int(prediction[1][0])
|
||||
|
||||
# Clamp the predicted gaze point to the screen boundaries
|
||||
x_pred = max(0, min(x_pred, screen_width - 1))
|
||||
y_pred = max(0, min(y_pred, screen_height - 1))
|
||||
|
||||
measurement = np.array([[np.float32(x)], [np.float32(y)]])
|
||||
if np.count_nonzero(kalman.statePre) == 0:
|
||||
kalman.statePre[:2] = measurement
|
||||
kalman.statePost[:2] = measurement
|
||||
kalman.correct(measurement)
|
||||
|
||||
elif filter_method == "kde":
|
||||
current_time = time.time()
|
||||
gaze_history.append((current_time, x, y))
|
||||
|
||||
# Remove old entries
|
||||
gaze_history = [
|
||||
(t, gx, gy)
|
||||
for (t, gx, gy) in gaze_history
|
||||
if current_time - t <= time_window
|
||||
]
|
||||
|
||||
if len(gaze_history) > 1:
|
||||
gaze_array = np.array([(gx, gy) for (t, gx, gy) in gaze_history])
|
||||
|
||||
# Check for singular covariance
|
||||
try:
|
||||
kde = gaussian_kde(gaze_array.T)
|
||||
|
||||
# Compute densities on a grid for visualization
|
||||
xi, yi = np.mgrid[0:screen_width:320j, 0:screen_height:200j]
|
||||
coords = np.vstack([xi.ravel(), yi.ravel()])
|
||||
zi = kde(coords).reshape(xi.shape).T
|
||||
|
||||
# Find the contour level for the desired confidence interval
|
||||
levels = np.linspace(zi.min(), zi.max(), 100)
|
||||
zi_flat = zi.flatten()
|
||||
sorted_indices = np.argsort(zi_flat)[::-1]
|
||||
zi_sorted = zi_flat[sorted_indices]
|
||||
cumsum = np.cumsum(zi_sorted)
|
||||
cumsum /= cumsum[-1] # Normalize to get CDF
|
||||
|
||||
# Find the density threshold corresponding to the confidence level
|
||||
idx = np.searchsorted(cumsum, confidence_level)
|
||||
if idx >= len(zi_sorted):
|
||||
idx = len(zi_sorted) - 1
|
||||
threshold = zi_sorted[idx]
|
||||
|
||||
# Create a binary mask where densities are above the threshold
|
||||
mask = np.where(zi >= threshold, 1, 0).astype(np.uint8)
|
||||
|
||||
# Resize mask to screen dimensions
|
||||
mask_resized = cv2.resize(mask, (screen_width, screen_height))
|
||||
|
||||
# Find contours in the binary mask
|
||||
contours, _ = cv2.findContours(
|
||||
mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
||||
)
|
||||
|
||||
x_pred = int(np.mean(gaze_array[:, 0]))
|
||||
y_pred = int(np.mean(gaze_array[:, 1]))
|
||||
except np.linalg.LinAlgError:
|
||||
x_pred = int(np.mean(gaze_array[:, 0]))
|
||||
y_pred = int(np.mean(gaze_array[:, 1]))
|
||||
contours = []
|
||||
else:
|
||||
x_pred, y_pred = x, y
|
||||
contours = []
|
||||
# Increase cursor alpha for fade-in effect
|
||||
elif filter_method == "none":
|
||||
x_pred, y_pred = x, y
|
||||
contours = []
|
||||
|
||||
cursor_alpha = min(cursor_alpha + cursor_alpha_step, 1.0)
|
||||
else:
|
||||
x_pred, y_pred = None, None
|
||||
blink_detected = True
|
||||
contours = []
|
||||
|
||||
# Decrease cursor alpha for fade-out effect
|
||||
cursor_alpha = max(cursor_alpha - cursor_alpha_step, 0.0)
|
||||
|
||||
canvas = background.copy()
|
||||
|
||||
if filter_method == "kde" and features is not None and not blink_detected:
|
||||
if len(gaze_history) > 1:
|
||||
if "contours" in locals():
|
||||
cv2.drawContours(canvas, contours, -1, (15, 182, 242), thickness=5)
|
||||
|
||||
# Draw the gaze cursor with fade effect
|
||||
if x_pred is not None and y_pred is not None and cursor_alpha > 0:
|
||||
overlay = canvas.copy()
|
||||
cv2.circle(overlay, (x_pred, y_pred), 30, (0, 0, 255), -1)
|
||||
cv2.circle(overlay, (x_pred, y_pred), 25, (255, 255, 255), -1)
|
||||
cv2.addWeighted(
|
||||
overlay, cursor_alpha * 0.6, canvas, 1 - cursor_alpha * 0.6, 0, canvas
|
||||
)
|
||||
|
||||
# Draw the camera feed
|
||||
small_frame = cv2.resize(frame, (cam_width, cam_height))
|
||||
frame_border = cv2.copyMakeBorder(
|
||||
small_frame, 2, 2, 2, 2, cv2.BORDER_CONSTANT, value=(255, 255, 255)
|
||||
)
|
||||
x_offset = screen_width - cam_width - 20
|
||||
y_offset = screen_height - cam_height - 20
|
||||
canvas[
|
||||
y_offset : y_offset + cam_height + 4, x_offset : x_offset + cam_width + 4
|
||||
] = frame_border
|
||||
|
||||
# FPS and blink indicator
|
||||
current_time = time.time()
|
||||
fps = 1 / (current_time - prev_time)
|
||||
prev_time = current_time
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 1.2
|
||||
font_color = (255, 255, 255)
|
||||
font_thickness = 2
|
||||
|
||||
cv2.putText(
|
||||
canvas,
|
||||
f"FPS: {int(fps)}",
|
||||
(50, 50),
|
||||
font,
|
||||
font_scale,
|
||||
font_color,
|
||||
font_thickness,
|
||||
lineType=cv2.LINE_AA,
|
||||
)
|
||||
|
||||
blink_text = "Blinking" if blink_detected else "Not Blinking"
|
||||
blink_color = (0, 0, 255) if blink_detected else (0, 255, 0)
|
||||
cv2.putText(
|
||||
canvas,
|
||||
blink_text,
|
||||
(50, 100),
|
||||
font,
|
||||
font_scale,
|
||||
blink_color,
|
||||
font_thickness,
|
||||
lineType=cv2.LINE_AA,
|
||||
)
|
||||
|
||||
cv2.imshow("Gaze Estimation", canvas)
|
||||
if cv2.waitKey(1) == 27:
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,14 +1,44 @@
|
||||
[build-system]
|
||||
requires = ["hatchling>=1.25"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "eyepy"
|
||||
version = "0.1.0"
|
||||
description = "EyePy is an eye tracking library easily implementable in your projects"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"mediapipe>=0.10.21",
|
||||
"numpy>=1.26.4",
|
||||
"opencv-python>=4.11.0.86",
|
||||
"pyvirtualcam>=0.12.1",
|
||||
"scikit-learn>=1.6.1",
|
||||
"tk>=0.1.0",
|
||||
name = "eyetrax"
|
||||
description = "Webcam-based eye-tracking"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
authors = [{ name = "Chenkai Zhang (ck-zhang)" }]
|
||||
requires-python = ">=3.9"
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"opencv-python>=4.5",
|
||||
"mediapipe>=0.10",
|
||||
"numpy>=1.22",
|
||||
"scikit-learn>=1.3",
|
||||
"scipy>=1.10",
|
||||
"screeninfo>=0.8",
|
||||
"pyvirtualcam>=0.10",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
homepage = "https://github.com/ck-zhang/eyetrax"
|
||||
|
||||
[project.scripts]
|
||||
eyetrax-demo = "eyetrax.app.demo:run_demo"
|
||||
eyetrax-virtualcam = "eyetrax.app.virtualcam:run_virtualcam"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/eyetrax"]
|
||||
|
||||
[tool.hatch.build]
|
||||
include = ["LICENSE"]
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "src/eyetrax/_version.py"
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv export --no-hashes --format requirements-txt
|
||||
absl-py==2.1.0
|
||||
attrs==25.1.0
|
||||
cffi==1.17.1
|
||||
contourpy==1.3.1
|
||||
cycler==0.12.1
|
||||
flatbuffers==25.2.10
|
||||
fonttools==4.56.0
|
||||
jax==0.5.1
|
||||
jaxlib==0.5.1
|
||||
joblib==1.4.2
|
||||
kiwisolver==1.4.8
|
||||
matplotlib==3.10.0
|
||||
mediapipe==0.10.21
|
||||
ml-dtypes==0.4.1 ; python_full_version >= '3.13' or sys_platform != 'darwin'
|
||||
ml-dtypes==0.5.1 ; python_full_version < '3.13' and sys_platform == 'darwin'
|
||||
numpy==1.26.4
|
||||
opencv-contrib-python==4.11.0.86
|
||||
opencv-python==4.11.0.86
|
||||
opt-einsum==3.4.0
|
||||
packaging==24.2
|
||||
pillow==11.1.0
|
||||
protobuf==4.25.6
|
||||
pycparser==2.22
|
||||
pyparsing==3.2.1
|
||||
python-dateutil==2.9.0.post0
|
||||
pyvirtualcam==0.12.1
|
||||
scikit-learn==1.6.1
|
||||
scipy==1.15.2
|
||||
sentencepiece==0.2.0
|
||||
six==1.17.0
|
||||
sounddevice==0.5.1
|
||||
threadpoolctl==3.5.0
|
||||
tk==0.1.0
|
||||
18
src/eyetrax/__init__.py
Normal file
18
src/eyetrax/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from ._version import __version__
|
||||
from .gaze import GazeEstimator
|
||||
|
||||
from .calibration import (
|
||||
run_9_point_calibration,
|
||||
run_5_point_calibration,
|
||||
run_lissajous_calibration,
|
||||
fine_tune_kalman_filter,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"GazeEstimator",
|
||||
"run_9_point_calibration",
|
||||
"run_5_point_calibration",
|
||||
"run_lissajous_calibration",
|
||||
"fine_tune_kalman_filter",
|
||||
]
|
||||
2
src/eyetrax/_version.py
Normal file
2
src/eyetrax/_version.py
Normal file
@@ -0,0 +1,2 @@
|
||||
__all__ = ["__version__"]
|
||||
__version__ = "0.2.0"
|
||||
0
src/eyetrax/app/__init__.py
Normal file
0
src/eyetrax/app/__init__.py
Normal file
213
src/eyetrax/app/demo.py
Normal file
213
src/eyetrax/app/demo.py
Normal file
@@ -0,0 +1,213 @@
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
import argparse
|
||||
import os
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
from eyetrax.utils.screen import get_screen_size
|
||||
from eyetrax.gaze import GazeEstimator
|
||||
from eyetrax.calibration import (
|
||||
run_9_point_calibration,
|
||||
run_5_point_calibration,
|
||||
run_lissajous_calibration,
|
||||
fine_tune_kalman_filter,
|
||||
)
|
||||
|
||||
|
||||
def run_demo():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Gaze Estimation with Kalman Filter or KDE"
|
||||
)
|
||||
parser.add_argument("--filter", choices=["kalman", "kde", "none"], default="none")
|
||||
parser.add_argument("--camera", type=int, default=0)
|
||||
parser.add_argument(
|
||||
"--calibration", choices=["9p", "5p", "lissajous"], default="9p"
|
||||
)
|
||||
parser.add_argument("--background", type=str, default=None)
|
||||
parser.add_argument("--confidence", type=float, default=0.5, help="0 < value < 1")
|
||||
args = parser.parse_args()
|
||||
|
||||
filter_method = args.filter
|
||||
camera_index = args.camera
|
||||
calibration_method = args.calibration
|
||||
background_path = args.background
|
||||
confidence_level = args.confidence
|
||||
|
||||
gaze_estimator = GazeEstimator()
|
||||
|
||||
if calibration_method == "9p":
|
||||
run_9_point_calibration(gaze_estimator, camera_index=camera_index)
|
||||
elif calibration_method == "5p":
|
||||
run_5_point_calibration(gaze_estimator, camera_index=camera_index)
|
||||
else:
|
||||
run_lissajous_calibration(gaze_estimator, camera_index=camera_index)
|
||||
|
||||
if filter_method == "kalman":
|
||||
kalman = cv2.KalmanFilter(4, 2)
|
||||
kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
|
||||
kalman.transitionMatrix = np.array(
|
||||
[[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
|
||||
)
|
||||
kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 50
|
||||
kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 0.2
|
||||
kalman.statePre = np.zeros((4, 1), np.float32)
|
||||
kalman.statePost = np.zeros((4, 1), np.float32)
|
||||
fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=camera_index)
|
||||
else:
|
||||
kalman = None
|
||||
|
||||
screen_width, screen_height = get_screen_size()
|
||||
cam_width, cam_height = 320, 240
|
||||
BORDER = 2
|
||||
MARGIN = 20
|
||||
|
||||
if background_path and os.path.isfile(background_path):
|
||||
background = cv2.imread(background_path)
|
||||
background = cv2.resize(background, (screen_width, screen_height))
|
||||
else:
|
||||
background = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
|
||||
background[:] = (50, 50, 50)
|
||||
|
||||
cv2.namedWindow("Gaze Estimation", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty(
|
||||
"Gaze Estimation", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN
|
||||
)
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
prev_time = time.time()
|
||||
|
||||
if filter_method == "kde":
|
||||
gaze_history = []
|
||||
time_window = 0.5
|
||||
|
||||
cursor_alpha = 0.0
|
||||
cursor_step = 0.05
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
if features is not None and not blink_detected:
|
||||
gaze_point = gaze_estimator.predict(np.array([features]))[0]
|
||||
x, y = map(int, gaze_point)
|
||||
|
||||
if kalman:
|
||||
prediction = kalman.predict()
|
||||
x_pred, y_pred = map(int, prediction[:2, 0])
|
||||
x_pred = max(0, min(x_pred, screen_width - 1))
|
||||
y_pred = max(0, min(y_pred, screen_height - 1))
|
||||
measurement = np.array([[np.float32(x)], [np.float32(y)]])
|
||||
if not np.any(kalman.statePre):
|
||||
kalman.statePre[:2] = measurement
|
||||
kalman.statePost[:2] = measurement
|
||||
kalman.correct(measurement)
|
||||
elif filter_method == "kde":
|
||||
now = time.time()
|
||||
gaze_history.append((now, x, y))
|
||||
gaze_history = [
|
||||
(t, gx, gy)
|
||||
for (t, gx, gy) in gaze_history
|
||||
if now - t <= time_window
|
||||
]
|
||||
if len(gaze_history) > 1:
|
||||
arr = np.array([(gx, gy) for (_, gx, gy) in gaze_history])
|
||||
try:
|
||||
kde = gaussian_kde(arr.T)
|
||||
xi, yi = np.mgrid[0:screen_width:320j, 0:screen_height:200j]
|
||||
zi = (
|
||||
kde(np.vstack([xi.ravel(), yi.ravel()])).reshape(xi.shape).T
|
||||
)
|
||||
flat = zi.ravel()
|
||||
idx = np.argsort(flat)[::-1]
|
||||
cdf = np.cumsum(flat[idx]) / flat.sum()
|
||||
threshold = flat[idx[np.searchsorted(cdf, confidence_level)]]
|
||||
mask = (zi >= threshold).astype(np.uint8)
|
||||
mask = cv2.resize(mask, (screen_width, screen_height))
|
||||
contours, _ = cv2.findContours(
|
||||
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
||||
)
|
||||
x_pred = int(np.mean(arr[:, 0]))
|
||||
y_pred = int(np.mean(arr[:, 1]))
|
||||
except np.linalg.LinAlgError:
|
||||
x_pred, y_pred = x, y
|
||||
contours = []
|
||||
else:
|
||||
x_pred, y_pred = x, y
|
||||
contours = []
|
||||
else:
|
||||
x_pred, y_pred = x, y
|
||||
contours = []
|
||||
|
||||
cursor_alpha = min(cursor_alpha + cursor_step, 1.0)
|
||||
else:
|
||||
x_pred = y_pred = None
|
||||
blink_detected = True
|
||||
contours = []
|
||||
cursor_alpha = max(cursor_alpha - cursor_step, 0.0)
|
||||
|
||||
canvas = background.copy()
|
||||
|
||||
if filter_method == "kde" and contours:
|
||||
cv2.drawContours(canvas, contours, -1, (15, 182, 242), 5)
|
||||
|
||||
if x_pred is not None and y_pred is not None and cursor_alpha > 0:
|
||||
overlay = canvas.copy()
|
||||
cv2.circle(overlay, (x_pred, y_pred), 30, (0, 0, 255), -1)
|
||||
cv2.circle(overlay, (x_pred, y_pred), 25, (255, 255, 255), -1)
|
||||
cv2.addWeighted(
|
||||
overlay, cursor_alpha * 0.6, canvas, 1 - cursor_alpha * 0.6, 0, canvas
|
||||
)
|
||||
|
||||
small = cv2.resize(frame, (cam_width, cam_height))
|
||||
thumb = cv2.copyMakeBorder(
|
||||
small,
|
||||
BORDER,
|
||||
BORDER,
|
||||
BORDER,
|
||||
BORDER,
|
||||
cv2.BORDER_CONSTANT,
|
||||
value=(255, 255, 255),
|
||||
)
|
||||
h, w = thumb.shape[:2]
|
||||
canvas[-h - MARGIN : -MARGIN, -w - MARGIN : -MARGIN] = thumb
|
||||
|
||||
now = time.time()
|
||||
fps = 1 / (now - prev_time)
|
||||
prev_time = now
|
||||
|
||||
cv2.putText(
|
||||
canvas,
|
||||
f"FPS: {int(fps)}",
|
||||
(50, 50),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1.2,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
blink_txt = "Blinking" if blink_detected else "Not Blinking"
|
||||
blink_clr = (0, 0, 255) if blink_detected else (0, 255, 0)
|
||||
cv2.putText(
|
||||
canvas,
|
||||
blink_txt,
|
||||
(50, 100),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1.2,
|
||||
blink_clr,
|
||||
2,
|
||||
cv2.LINE_AA,
|
||||
)
|
||||
|
||||
cv2.imshow("Gaze Estimation", canvas)
|
||||
if cv2.waitKey(1) == 27:
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_demo()
|
||||
96
virtual_cam.py → src/eyetrax/app/virtualcam.py
Executable file → Normal file
96
virtual_cam.py → src/eyetrax/app/virtualcam.py
Executable file → Normal file
@@ -2,11 +2,12 @@ import argparse
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
import tkinter as tk
|
||||
import pyvirtualcam
|
||||
from scipy.stats import gaussian_kde
|
||||
from gaze_estimator import GazeEstimator
|
||||
from calibration import (
|
||||
|
||||
from eyetrax.utils.screen import get_screen_size
|
||||
from eyetrax.gaze import GazeEstimator
|
||||
from eyetrax.calibration import (
|
||||
run_9_point_calibration,
|
||||
run_5_point_calibration,
|
||||
run_lissajous_calibration,
|
||||
@@ -14,10 +15,8 @@ from calibration import (
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Virtual Camera Gaze Overlay (v4l2loopback)"
|
||||
)
|
||||
def run_virtualcam():
|
||||
parser = argparse.ArgumentParser(description="Virtual Camera Gaze Overlay")
|
||||
parser.add_argument("--filter", choices=["kalman", "kde", "none"], default="none")
|
||||
parser.add_argument("--camera", type=int, default=0)
|
||||
parser.add_argument(
|
||||
@@ -46,16 +45,13 @@ def main():
|
||||
kalman.transitionMatrix = np.array(
|
||||
[[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
|
||||
)
|
||||
kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10
|
||||
kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1
|
||||
kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 50
|
||||
kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 0.2
|
||||
kalman.statePre = np.zeros((4, 1), np.float32)
|
||||
kalman.statePost = np.zeros((4, 1), np.float32)
|
||||
fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=camera_index)
|
||||
|
||||
root = tk.Tk()
|
||||
screen_width = root.winfo_screenwidth()
|
||||
screen_height = root.winfo_screenheight()
|
||||
root.destroy()
|
||||
screen_width, screen_height = get_screen_size()
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not cap.isOpened():
|
||||
@@ -71,15 +67,15 @@ def main():
|
||||
|
||||
gaze_history = []
|
||||
time_window = 0.5
|
||||
prev_time = time.time()
|
||||
mask_prev = None
|
||||
mask_next = None
|
||||
mask_prev = mask_next = None
|
||||
blend_alpha = 1.0
|
||||
contours_cache = []
|
||||
last_kde_x_pred = None
|
||||
last_kde_y_pred = None
|
||||
last_kde_x_pred = last_kde_y_pred = None
|
||||
frame_count = 0
|
||||
|
||||
BORDER = 2
|
||||
MARGIN = 20
|
||||
|
||||
with pyvirtualcam.Camera(
|
||||
width=screen_width,
|
||||
height=screen_height,
|
||||
@@ -94,49 +90,48 @@ def main():
|
||||
continue
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
x_pred, y_pred = None, None
|
||||
x_pred = y_pred = None
|
||||
|
||||
if features is not None and not blink_detected:
|
||||
gaze_point = gaze_estimator.predict(np.array([features]))[0]
|
||||
x, y = int(gaze_point[0]), int(gaze_point[1])
|
||||
x, y = map(int, gaze_point)
|
||||
|
||||
if kalman and filter_method == "kalman":
|
||||
prediction = kalman.predict()
|
||||
x_pred = int(prediction[0][0])
|
||||
y_pred = int(prediction[1][0])
|
||||
x_pred, y_pred = map(int, prediction[:2, 0])
|
||||
x_pred = max(0, min(x_pred, screen_width - 1))
|
||||
y_pred = max(0, min(y_pred, screen_height - 1))
|
||||
measurement = np.array([[np.float32(x)], [np.float32(y)]])
|
||||
if np.count_nonzero(kalman.statePre) == 0:
|
||||
if not np.any(kalman.statePre):
|
||||
kalman.statePre[:2] = measurement
|
||||
kalman.statePost[:2] = measurement
|
||||
kalman.correct(measurement)
|
||||
|
||||
elif filter_method == "kde":
|
||||
current_time = time.time()
|
||||
gaze_history.append((current_time, x, y))
|
||||
now = time.time()
|
||||
gaze_history.append((now, x, y))
|
||||
gaze_history = [
|
||||
(t, gx, gy)
|
||||
for (t, gx, gy) in gaze_history
|
||||
if current_time - t <= time_window
|
||||
if now - t <= time_window
|
||||
]
|
||||
if len(gaze_history) > 1 and frame_count % 5 == 0:
|
||||
arr = np.array([[gx, gy] for (_, gx, gy) in gaze_history])
|
||||
arr = np.array([(gx, gy) for (_, gx, gy) in gaze_history])
|
||||
try:
|
||||
kde = gaussian_kde(arr.T)
|
||||
xi, yi = np.mgrid[0:screen_width:200j, 0:screen_height:120j]
|
||||
coords = np.vstack([xi.ravel(), yi.ravel()])
|
||||
zi = kde(coords).reshape(xi.shape).T
|
||||
zi_flat = zi.flatten()
|
||||
sort_idx = np.argsort(zi_flat)[::-1]
|
||||
zi_sorted = zi_flat[sort_idx]
|
||||
cumsum = np.cumsum(zi_sorted)
|
||||
cumsum /= cumsum[-1]
|
||||
idx = np.searchsorted(cumsum, confidence_level)
|
||||
if idx >= len(zi_sorted):
|
||||
idx = len(zi_sorted) - 1
|
||||
threshold = zi_sorted[idx]
|
||||
mask_new = np.where(zi >= threshold, 1, 0).astype(np.uint8)
|
||||
zi = (
|
||||
kde(np.vstack([xi.ravel(), yi.ravel()]))
|
||||
.reshape(xi.shape)
|
||||
.T
|
||||
)
|
||||
flat = zi.ravel()
|
||||
idx = np.argsort(flat)[::-1]
|
||||
cdf = np.cumsum(flat[idx]) / flat.sum()
|
||||
threshold = flat[
|
||||
idx[np.searchsorted(cdf, confidence_level)]
|
||||
]
|
||||
mask_new = (zi >= threshold).astype(np.uint8)
|
||||
mask_new = cv2.resize(
|
||||
mask_new, (screen_width, screen_height)
|
||||
)
|
||||
@@ -169,7 +164,7 @@ def main():
|
||||
and mask_next is not None
|
||||
):
|
||||
blend_alpha = min(blend_alpha + 0.2, 1.0)
|
||||
blended_mask = cv2.addWeighted(
|
||||
blended = cv2.addWeighted(
|
||||
mask_prev.astype(np.float32),
|
||||
1.0 - blend_alpha,
|
||||
mask_next.astype(np.float32),
|
||||
@@ -177,10 +172,10 @@ def main():
|
||||
0,
|
||||
).astype(np.uint8)
|
||||
kernel2 = np.ones((5, 5), np.uint8)
|
||||
blended_mask = cv2.morphologyEx(blended_mask, cv2.MORPH_OPEN, kernel2)
|
||||
blended_mask = cv2.morphologyEx(blended_mask, cv2.MORPH_CLOSE, kernel2)
|
||||
blended = cv2.morphologyEx(blended, cv2.MORPH_OPEN, kernel2)
|
||||
blended = cv2.morphologyEx(blended, cv2.MORPH_CLOSE, kernel2)
|
||||
contours, _ = cv2.findContours(
|
||||
blended_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS
|
||||
blended, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS
|
||||
)
|
||||
contours_cache = contours
|
||||
if x_pred is not None and y_pred is not None:
|
||||
@@ -192,6 +187,19 @@ def main():
|
||||
if filter_method != "kde" and x_pred is not None and y_pred is not None:
|
||||
cv2.circle(output, (x_pred, y_pred), 10, (0, 0, 255), -1)
|
||||
|
||||
small = cv2.resize(frame, (cam_width, cam_height))
|
||||
thumb = cv2.copyMakeBorder(
|
||||
small,
|
||||
BORDER,
|
||||
BORDER,
|
||||
BORDER,
|
||||
BORDER,
|
||||
cv2.BORDER_CONSTANT,
|
||||
value=(255, 255, 255),
|
||||
)
|
||||
h, w = thumb.shape[:2]
|
||||
output[-h - MARGIN : -MARGIN, -w - MARGIN : -MARGIN] = thumb
|
||||
|
||||
cam.send(output)
|
||||
cam.sleep_until_next_frame()
|
||||
frame_count += 1
|
||||
@@ -201,4 +209,4 @@ def main():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
run_virtualcam()
|
||||
13
src/eyetrax/calibration/__init__.py
Normal file
13
src/eyetrax/calibration/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from .common import wait_for_face_and_countdown
|
||||
from .nine_point import run_9_point_calibration
|
||||
from .five_point import run_5_point_calibration
|
||||
from .lissajous import run_lissajous_calibration
|
||||
from .fine_tune import fine_tune_kalman_filter
|
||||
|
||||
__all__ = [
|
||||
"wait_for_face_and_countdown",
|
||||
"run_9_point_calibration",
|
||||
"run_5_point_calibration",
|
||||
"run_lissajous_calibration",
|
||||
"fine_tune_kalman_filter",
|
||||
]
|
||||
56
src/eyetrax/calibration/common.py
Normal file
56
src/eyetrax/calibration/common.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur: int = 2) -> bool:
|
||||
"""
|
||||
Waits for a face to be detected (not blinking), then shows a countdown ellipse.
|
||||
"""
|
||||
cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
||||
fd_start = None
|
||||
countdown = False
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
f, blink = gaze_estimator.extract_features(frame)
|
||||
face = f is not None and not blink
|
||||
canvas = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
now = time.time()
|
||||
if face:
|
||||
if not countdown:
|
||||
fd_start = now
|
||||
countdown = True
|
||||
elapsed = now - fd_start
|
||||
if elapsed >= dur:
|
||||
return True
|
||||
t = elapsed / dur
|
||||
e = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - e)
|
||||
cv2.ellipse(
|
||||
canvas,
|
||||
(sw // 2, sh // 2),
|
||||
(50, 50),
|
||||
0,
|
||||
-90,
|
||||
-90 + ang,
|
||||
(0, 255, 0),
|
||||
-1,
|
||||
)
|
||||
else:
|
||||
countdown = False
|
||||
fd_start = None
|
||||
txt = "Face not detected"
|
||||
fs = 2
|
||||
thick = 3
|
||||
size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick)
|
||||
tx = (sw - size[0]) // 2
|
||||
ty = (sh + size[1]) // 2
|
||||
cv2.putText(
|
||||
canvas, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick
|
||||
)
|
||||
cv2.imshow("Calibration", canvas)
|
||||
if cv2.waitKey(1) == 27:
|
||||
return False
|
||||
125
src/eyetrax/calibration/fine_tune.py
Normal file
125
src/eyetrax/calibration/fine_tune.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from eyetrax.utils.screen import get_screen_size
|
||||
|
||||
|
||||
def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index: int = 0):
|
||||
"""
|
||||
Quick fine‑tuning pass to adjust Kalman filter's measurementNoiseCov.
|
||||
"""
|
||||
screen_width, screen_height = get_screen_size()
|
||||
|
||||
points_tpl = [
|
||||
(screen_width // 2, screen_height // 4),
|
||||
(screen_width // 4, 3 * screen_height // 4),
|
||||
(3 * screen_width // 4, 3 * screen_height // 4),
|
||||
]
|
||||
|
||||
points = [
|
||||
dict(
|
||||
position=pos,
|
||||
start_time=None,
|
||||
data_collection_started=False,
|
||||
collection_start_time=None,
|
||||
collected_gaze=[],
|
||||
)
|
||||
for pos in points_tpl
|
||||
]
|
||||
|
||||
proximity_threshold = screen_width / 5
|
||||
initial_delay = 0.5
|
||||
data_collection_duration = 0.5
|
||||
|
||||
cv2.namedWindow("Fine Tuning", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
gaze_positions = []
|
||||
|
||||
while points:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
|
||||
|
||||
for point in points:
|
||||
cv2.circle(canvas, point["position"], 20, (0, 255, 0), -1)
|
||||
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
text = "Look at the points until they disappear"
|
||||
size, _ = cv2.getTextSize(text, font, 1.5, 2)
|
||||
cv2.putText(
|
||||
canvas,
|
||||
text,
|
||||
((screen_width - size[0]) // 2, screen_height - 50),
|
||||
font,
|
||||
1.5,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
now = time.time()
|
||||
|
||||
if features is not None and not blink_detected:
|
||||
gaze_point = gaze_estimator.predict(np.array([features]))[0]
|
||||
gaze_x, gaze_y = map(int, gaze_point)
|
||||
cv2.circle(canvas, (gaze_x, gaze_y), 10, (255, 0, 0), -1)
|
||||
|
||||
for point in points[:]:
|
||||
dx, dy = gaze_x - point["position"][0], gaze_y - point["position"][1]
|
||||
if np.hypot(dx, dy) <= proximity_threshold:
|
||||
if point["start_time"] is None:
|
||||
point["start_time"] = now
|
||||
elapsed = now - point["start_time"]
|
||||
|
||||
if (
|
||||
not point["data_collection_started"]
|
||||
and elapsed >= initial_delay
|
||||
):
|
||||
point["data_collection_started"] = True
|
||||
point["collection_start_time"] = now
|
||||
|
||||
if point["data_collection_started"]:
|
||||
data_elapsed = now - point["collection_start_time"]
|
||||
point["collected_gaze"].append([gaze_x, gaze_y])
|
||||
shake = int(5 + (data_elapsed / data_collection_duration) * 20)
|
||||
shaken = (
|
||||
point["position"][0]
|
||||
+ int(np.random.uniform(-shake, shake)),
|
||||
point["position"][1]
|
||||
+ int(np.random.uniform(-shake, shake)),
|
||||
)
|
||||
cv2.circle(canvas, shaken, 20, (0, 255, 0), -1)
|
||||
if data_elapsed >= data_collection_duration:
|
||||
gaze_positions.extend(point["collected_gaze"])
|
||||
points.remove(point)
|
||||
else:
|
||||
cv2.circle(canvas, point["position"], 25, (0, 255, 255), 2)
|
||||
else:
|
||||
point.update(
|
||||
start_time=None,
|
||||
data_collection_started=False,
|
||||
collection_start_time=None,
|
||||
collected_gaze=[],
|
||||
)
|
||||
|
||||
cv2.imshow("Fine Tuning", canvas)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyWindow("Fine Tuning")
|
||||
return
|
||||
|
||||
cap.release()
|
||||
cv2.destroyWindow("Fine Tuning")
|
||||
|
||||
gaze_positions = np.array(gaze_positions)
|
||||
if gaze_positions.shape[0] < 2:
|
||||
return
|
||||
|
||||
var = np.var(gaze_positions, axis=0)
|
||||
var[var == 0] = 1e-4
|
||||
kalman.measurementNoiseCov = np.array([[var[0], 0], [0, var[1]]], dtype=np.float32)
|
||||
77
src/eyetrax/calibration/five_point.py
Normal file
77
src/eyetrax/calibration/five_point.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from eyetrax.utils.screen import get_screen_size
|
||||
from eyetrax.calibration.common import wait_for_face_and_countdown
|
||||
|
||||
|
||||
def run_5_point_calibration(gaze_estimator, camera_index: int = 0):
|
||||
"""
|
||||
Faster five-point calibration
|
||||
"""
|
||||
sw, sh = get_screen_size()
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
|
||||
mx, my = int(sw * 0.1), int(sh * 0.1)
|
||||
gw, gh = sw - 2 * mx, sh - 2 * my
|
||||
order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)]
|
||||
pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
|
||||
|
||||
feats, targs = [], []
|
||||
pulse_d, cd_d = 1.0, 1.0
|
||||
|
||||
for _ in range(1):
|
||||
for x, y in pts:
|
||||
ps = time.time()
|
||||
final_radius = 20
|
||||
while True:
|
||||
e = time.time() - ps
|
||||
if e > pulse_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
|
||||
final_radius = radius
|
||||
cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
|
||||
cs = time.time()
|
||||
while True:
|
||||
e = time.time() - cs
|
||||
if e > cd_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
|
||||
t = e / cd_d
|
||||
ease = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - ease)
|
||||
cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
61
src/eyetrax/calibration/lissajous.py
Normal file
61
src/eyetrax/calibration/lissajous.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from eyetrax.utils.screen import get_screen_size
|
||||
from eyetrax.calibration.common import wait_for_face_and_countdown
|
||||
|
||||
|
||||
def run_lissajous_calibration(gaze_estimator, camera_index: int = 0):
|
||||
"""
|
||||
Moves a calibration point along a Lissajous curve
|
||||
"""
|
||||
sw, sh = get_screen_size()
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
|
||||
A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0
|
||||
|
||||
def curve(t):
|
||||
return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2)
|
||||
|
||||
total_time = 5.0
|
||||
fps = 60
|
||||
frames = int(total_time * fps)
|
||||
feats, targs = [], []
|
||||
acc = 0
|
||||
|
||||
for i in range(frames):
|
||||
frac = i / (frames - 1)
|
||||
spd = 0.3 + 0.7 * np.sin(np.pi * frac)
|
||||
acc += spd / fps
|
||||
end = acc if acc >= 1e-6 else 1e-6
|
||||
acc = 0
|
||||
|
||||
for i in range(frames):
|
||||
frac = i / (frames - 1)
|
||||
spd = 0.3 + 0.7 * np.sin(np.pi * frac)
|
||||
acc += spd / fps
|
||||
t = (acc / end) * (2 * np.pi)
|
||||
ret, f = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
x, y = curve(t)
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
break
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
77
src/eyetrax/calibration/nine_point.py
Normal file
77
src/eyetrax/calibration/nine_point.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from eyetrax.utils.screen import get_screen_size
|
||||
from eyetrax.calibration.common import wait_for_face_and_countdown
|
||||
|
||||
|
||||
def run_9_point_calibration(gaze_estimator, camera_index: int = 0):
|
||||
"""
|
||||
Standard nine‑point calibration
|
||||
"""
|
||||
sw, sh = get_screen_size()
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
|
||||
mx, my = int(sw * 0.1), int(sh * 0.1)
|
||||
gw, gh = sw - 2 * mx, sh - 2 * my
|
||||
order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)]
|
||||
pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
|
||||
|
||||
feats, targs = [], []
|
||||
pulse_d, cd_d = 1.0, 1.0
|
||||
|
||||
for _ in range(1):
|
||||
for x, y in pts:
|
||||
ps = time.time()
|
||||
final_radius = 20
|
||||
while True:
|
||||
e = time.time() - ps
|
||||
if e > pulse_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
|
||||
final_radius = radius
|
||||
cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
|
||||
cs = time.time()
|
||||
while True:
|
||||
e = time.time() - cs
|
||||
if e > cd_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
|
||||
t = e / cd_d
|
||||
ease = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - ease)
|
||||
cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
@@ -63,15 +63,15 @@ class GazeEstimator:
|
||||
]
|
||||
|
||||
mutual_indices = [
|
||||
4, # Nose
|
||||
4, # Nose
|
||||
10, # Very top
|
||||
151, # Forehead
|
||||
9, # Between brow
|
||||
152, # Chin
|
||||
234, # Very left
|
||||
454, # Very right
|
||||
151, # Forehead
|
||||
9, # Between brow
|
||||
152, # Chin
|
||||
234, # Very left
|
||||
454, # Very right
|
||||
58, # Left jaw
|
||||
288, # Right jaw
|
||||
288, # Right jaw
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
@@ -139,7 +139,6 @@ class GazeEstimator:
|
||||
Trains gaze prediction model
|
||||
"""
|
||||
self.variable_scaling = variable_scaling
|
||||
|
||||
X_scaled = self.scaler.fit_transform(X)
|
||||
if self.variable_scaling is not None:
|
||||
X_scaled *= self.variable_scaling
|
||||
6
src/eyetrax/utils/screen.py
Normal file
6
src/eyetrax/utils/screen.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from screeninfo import get_monitors
|
||||
|
||||
|
||||
def get_screen_size():
|
||||
m = get_monitors()[0]
|
||||
return m.width, m.height
|
||||
Reference in New Issue
Block a user