mirror of
https://github.com/ck-zhang/EyePy.git
synced 2025-12-31 00:10:06 -06:00
Better calibration
This commit is contained in:
346
calibration.py
346
calibration.py
@@ -5,129 +5,253 @@ import time
|
||||
from gaze_estimator import GazeEstimator
|
||||
|
||||
|
||||
def run_calibration(gaze_estimator, camera_index=0):
|
||||
root = tk.Tk()
|
||||
screen_width = root.winfo_screenwidth()
|
||||
screen_height = root.winfo_screenheight()
|
||||
root.destroy()
|
||||
|
||||
A = screen_width * 0.4
|
||||
B = screen_height * 0.4
|
||||
a = 3
|
||||
b = 2
|
||||
delta = 0
|
||||
|
||||
total_time = 5
|
||||
fps = 60
|
||||
total_frames = int(total_time * fps)
|
||||
|
||||
def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur=2):
|
||||
"""
|
||||
Waits for a face to be detected (not blinking), then does a countdown ellipse.
|
||||
"""
|
||||
cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN)
|
||||
cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
|
||||
features_list = []
|
||||
targets_list = []
|
||||
|
||||
def lissajous_curve(t, A, B, a, b, delta):
|
||||
x = A * np.sin(a * t + delta) + screen_width / 2
|
||||
y = B * np.sin(b * t) + screen_height / 2
|
||||
return x, y
|
||||
|
||||
face_detected = False
|
||||
countdown_active = False
|
||||
face_detection_start_time = None
|
||||
countdown_duration = 2
|
||||
|
||||
fd_start = None
|
||||
countdown = False
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
if features is not None and not blink_detected:
|
||||
face_detected = True
|
||||
else:
|
||||
face_detected = False
|
||||
|
||||
canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
if face_detected:
|
||||
if not countdown_active:
|
||||
face_detection_start_time = current_time
|
||||
countdown_active = True
|
||||
elapsed_time = current_time - face_detection_start_time
|
||||
if elapsed_time >= countdown_duration:
|
||||
countdown_active = False
|
||||
break
|
||||
else:
|
||||
t = elapsed_time / countdown_duration
|
||||
eased_t = t * t * (3 - 2 * t)
|
||||
angle = 360 * (1 - eased_t)
|
||||
center = (screen_width // 2, screen_height // 2)
|
||||
radius = 50
|
||||
axes = (radius, radius)
|
||||
start_angle = -90
|
||||
end_angle = start_angle + angle
|
||||
color = (0, 255, 0)
|
||||
thickness = -1
|
||||
cv2.ellipse(
|
||||
canvas, center, axes, 0, start_angle, end_angle, color, thickness
|
||||
)
|
||||
else:
|
||||
countdown_active = False
|
||||
face_detection_start_time = None
|
||||
text = "Face not detected"
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
font_scale = 2
|
||||
color = (0, 0, 255)
|
||||
thickness = 3
|
||||
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
|
||||
text_x = (screen_width - text_size[0]) // 2
|
||||
text_y = (screen_height + text_size[1]) // 2
|
||||
cv2.putText(
|
||||
canvas, text, (text_x, text_y), font, font_scale, color, thickness
|
||||
f, blink = gaze_estimator.extract_features(frame)
|
||||
face = f is not None and not blink
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
now = time.time()
|
||||
if face:
|
||||
if not countdown:
|
||||
fd_start = now
|
||||
countdown = True
|
||||
elapsed = now - fd_start
|
||||
if elapsed >= dur:
|
||||
return True
|
||||
t = elapsed / dur
|
||||
e = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - e)
|
||||
cv2.ellipse(
|
||||
c, (sw // 2, sh // 2), (50, 50), 0, -90, -90 + ang, (0, 255, 0), -1
|
||||
)
|
||||
|
||||
cv2.imshow("Calibration", canvas)
|
||||
else:
|
||||
countdown = False
|
||||
fd_start = None
|
||||
txt = "Face not detected"
|
||||
fs = 2
|
||||
thick = 3
|
||||
size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick)
|
||||
tx = (sw - size[0]) // 2
|
||||
ty = (sh + size[1]) // 2
|
||||
cv2.putText(
|
||||
c, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick
|
||||
)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyWindow("Calibration")
|
||||
return
|
||||
return False
|
||||
|
||||
start_time = time.time()
|
||||
for frame_idx in range(total_frames):
|
||||
ret, frame = cap.read()
|
||||
|
||||
def run_9_point_calibration(gaze_estimator, camera_index=0):
|
||||
"""
|
||||
Standard 9-point calibration
|
||||
"""
|
||||
root = tk.Tk()
|
||||
sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
root.destroy()
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
mx, my = int(sw * 0.1), int(sh * 0.1)
|
||||
gw, gh = sw - 2 * mx, sh - 2 * my
|
||||
order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)]
|
||||
pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
|
||||
feats, targs = [], []
|
||||
pulse_d, cd_d = 1.0, 1.0
|
||||
for cycle in range(2):
|
||||
for x, y in pts:
|
||||
ps = time.time()
|
||||
final_radius = 20
|
||||
while True:
|
||||
e = time.time() - ps
|
||||
if e > pulse_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
|
||||
final_radius = radius
|
||||
cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
cs = time.time()
|
||||
while True:
|
||||
e = time.time() - cs
|
||||
if e > cd_d:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
|
||||
t = e / cd_d
|
||||
ease = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - ease)
|
||||
cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
|
||||
|
||||
def run_5_point_calibration(gaze_estimator, camera_index=0):
|
||||
"""
|
||||
Simpler 5-point calibration
|
||||
"""
|
||||
root = tk.Tk()
|
||||
sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
root.destroy()
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
m = 100
|
||||
# center, top-left, top-right, bottom-left, bottom-right
|
||||
order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)]
|
||||
pts = []
|
||||
for r, c in order:
|
||||
x = m if c == 0 else (sw - m if c == 2 else sw // 2)
|
||||
y = m if r == 0 else (sh - m if r == 2 else sh // 2)
|
||||
pts.append((x, y))
|
||||
feats, targs = [], []
|
||||
pd, cd = 1.0, 1.0
|
||||
for cycle in range(2):
|
||||
for x, y in pts:
|
||||
ps = time.time()
|
||||
final_radius = 20
|
||||
while True:
|
||||
e = time.time() - ps
|
||||
if e > pd:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
|
||||
final_radius = radius
|
||||
cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
cs = time.time()
|
||||
while True:
|
||||
e = time.time() - cs
|
||||
if e > cd:
|
||||
break
|
||||
r, f = cap.read()
|
||||
if not r:
|
||||
continue
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
|
||||
t = e / cd
|
||||
ease = t * t * (3 - 2 * t)
|
||||
ang = 360 * (1 - ease)
|
||||
cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
|
||||
|
||||
def run_lissajous_calibration(gaze_estimator, camera_index=0):
|
||||
"""
|
||||
Moves a calibration point in a Lissajous curve
|
||||
"""
|
||||
root = tk.Tk()
|
||||
sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
|
||||
root.destroy()
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0
|
||||
|
||||
def curve(t):
|
||||
return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2)
|
||||
|
||||
tt = 5.0
|
||||
fps = 60
|
||||
frames = int(tt * fps)
|
||||
feats, targs = [], []
|
||||
vals = []
|
||||
acc = 0
|
||||
|
||||
# Generate a time scale that speeds up / slows down sinusoidally
|
||||
for i in range(frames):
|
||||
frac = i / (frames - 1)
|
||||
spd = 0.3 + 0.7 * np.sin(np.pi * frac)
|
||||
acc += spd / fps
|
||||
end = acc
|
||||
if end < 1e-6:
|
||||
end = 1e-6
|
||||
acc = 0
|
||||
|
||||
for i in range(frames):
|
||||
frac = i / (frames - 1)
|
||||
spd = 0.3 + 0.7 * np.sin(np.pi * frac)
|
||||
acc += spd / fps
|
||||
t = (acc / end) * (2 * np.pi)
|
||||
ret, f = cap.read()
|
||||
if not ret:
|
||||
continue
|
||||
|
||||
t = (time.time() - start_time) * (2 * np.pi / total_time)
|
||||
x, y = lissajous_curve(t, A, B, a, b, delta)
|
||||
x, y = int(x), int(y)
|
||||
|
||||
canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
|
||||
cv2.circle(canvas, (x, y), 20, (0, 255, 0), -1)
|
||||
|
||||
cv2.imshow("Calibration", canvas)
|
||||
cv2.waitKey(1)
|
||||
|
||||
features, blink_detected = gaze_estimator.extract_features(frame)
|
||||
if features is not None and not blink_detected:
|
||||
features_list.append(features)
|
||||
targets_list.append([x, y])
|
||||
x, y = curve(t)
|
||||
c = np.zeros((sh, sw, 3), dtype=np.uint8)
|
||||
cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1)
|
||||
cv2.imshow("Calibration", c)
|
||||
if cv2.waitKey(1) == 27:
|
||||
break
|
||||
ft, blink = gaze_estimator.extract_features(f)
|
||||
if ft is not None and not blink:
|
||||
feats.append(ft)
|
||||
targs.append([x, y])
|
||||
|
||||
cap.release()
|
||||
cv2.destroyWindow("Calibration")
|
||||
|
||||
X = np.array(features_list)
|
||||
y = np.array(targets_list)
|
||||
|
||||
gaze_estimator.train(X, y)
|
||||
cv2.destroyAllWindows()
|
||||
if feats:
|
||||
gaze_estimator.train(np.array(feats), np.array(targs))
|
||||
|
||||
|
||||
def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
"""
|
||||
Quick fine-tuning pass to adjust Kalman filter's measurementNoiseCov.
|
||||
"""
|
||||
root = tk.Tk()
|
||||
screen_width = root.winfo_screenwidth()
|
||||
screen_height = root.winfo_screenheight()
|
||||
@@ -158,7 +282,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
]
|
||||
|
||||
points = initial_points.copy()
|
||||
|
||||
proximity_threshold = screen_width / 5
|
||||
initial_delay = 0.5
|
||||
data_collection_duration = 0.5
|
||||
@@ -167,7 +290,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
|
||||
gaze_positions = []
|
||||
|
||||
while len(points) > 0:
|
||||
@@ -225,7 +347,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
current_time - point["collection_start_time"]
|
||||
)
|
||||
point["collected_gaze"].append([gaze_x, gaze_y])
|
||||
|
||||
shake_amplitude = int(
|
||||
5
|
||||
+ (data_collection_elapsed / data_collection_duration) * 20
|
||||
@@ -237,8 +358,8 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
np.random.uniform(-shake_amplitude, shake_amplitude)
|
||||
)
|
||||
shaken_position = (
|
||||
int(point["position"][0] + shake_x),
|
||||
int(point["position"][1] + shake_y),
|
||||
point["position"][0] + shake_x,
|
||||
point["position"][1] + shake_y,
|
||||
)
|
||||
cv2.circle(canvas, shaken_position, 20, (0, 255, 0), -1)
|
||||
|
||||
@@ -274,7 +395,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
|
||||
|
||||
gaze_variance = np.var(gaze_positions, axis=0)
|
||||
gaze_variance[gaze_variance == 0] = 1e-4
|
||||
|
||||
kalman.measurementNoiseCov = np.array(
|
||||
[[gaze_variance[0], 0], [0, gaze_variance[1]]], dtype=np.float32
|
||||
)
|
||||
|
||||
25
demo.py
25
demo.py
@@ -5,7 +5,12 @@ import time
|
||||
import argparse
|
||||
import os
|
||||
from gaze_estimator import GazeEstimator
|
||||
from calibration import run_calibration, fine_tune_kalman_filter
|
||||
from calibration import (
|
||||
run_9_point_calibration,
|
||||
run_5_point_calibration,
|
||||
run_lissajous_calibration,
|
||||
fine_tune_kalman_filter,
|
||||
)
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
|
||||
@@ -20,6 +25,12 @@ def main():
|
||||
help="Filter method: kalman, kde, or none",
|
||||
)
|
||||
parser.add_argument("--camera", type=int, default=0, help="Camera index")
|
||||
parser.add_argument(
|
||||
"--calibration",
|
||||
choices=["9p", "5p", "lissajous"],
|
||||
default="9p",
|
||||
help="Choose calibration method (9p, 5p, or lissajous).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--background", type=str, default=None, help="Path to background image"
|
||||
)
|
||||
@@ -33,18 +44,26 @@ def main():
|
||||
|
||||
filter_method = args.filter
|
||||
camera_index = args.camera
|
||||
calibration_method = args.calibration
|
||||
background_path = args.background
|
||||
confidence_level = args.confidence
|
||||
|
||||
gaze_estimator = GazeEstimator()
|
||||
|
||||
run_calibration(gaze_estimator, camera_index=camera_index)
|
||||
# Run the chosen calibration method (default 9p)
|
||||
if calibration_method == "9p":
|
||||
run_9_point_calibration(gaze_estimator, camera_index=camera_index)
|
||||
elif calibration_method == "5p":
|
||||
run_5_point_calibration(gaze_estimator, camera_index=camera_index)
|
||||
else:
|
||||
run_lissajous_calibration(gaze_estimator, camera_index=camera_index)
|
||||
|
||||
if filter_method == "kalman":
|
||||
kalman = cv2.KalmanFilter(4, 2)
|
||||
kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
|
||||
kalman.transitionMatrix = np.array(
|
||||
[[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
|
||||
[[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]],
|
||||
np.float32,
|
||||
)
|
||||
kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10
|
||||
kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1
|
||||
|
||||
Reference in New Issue
Block a user