diff --git a/calibration.py b/calibration.py index e786560..e2ff0e8 100644 --- a/calibration.py +++ b/calibration.py @@ -5,129 +5,253 @@ import time from gaze_estimator import GazeEstimator -def run_calibration(gaze_estimator, camera_index=0): - root = tk.Tk() - screen_width = root.winfo_screenwidth() - screen_height = root.winfo_screenheight() - root.destroy() - - A = screen_width * 0.4 - B = screen_height * 0.4 - a = 3 - b = 2 - delta = 0 - - total_time = 5 - fps = 60 - total_frames = int(total_time * fps) - +def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur=2): + """ + Waits for a face to be detected (not blinking), then does a countdown ellipse. + """ cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) - - cap = cv2.VideoCapture(camera_index) - - features_list = [] - targets_list = [] - - def lissajous_curve(t, A, B, a, b, delta): - x = A * np.sin(a * t + delta) + screen_width / 2 - y = B * np.sin(b * t) + screen_height / 2 - return x, y - - face_detected = False - countdown_active = False - face_detection_start_time = None - countdown_duration = 2 - + fd_start = None + countdown = False while True: ret, frame = cap.read() if not ret: continue - - features, blink_detected = gaze_estimator.extract_features(frame) - if features is not None and not blink_detected: - face_detected = True - else: - face_detected = False - - canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8) - - current_time = time.time() - - if face_detected: - if not countdown_active: - face_detection_start_time = current_time - countdown_active = True - elapsed_time = current_time - face_detection_start_time - if elapsed_time >= countdown_duration: - countdown_active = False - break - else: - t = elapsed_time / countdown_duration - eased_t = t * t * (3 - 2 * t) - angle = 360 * (1 - eased_t) - center = (screen_width // 2, screen_height // 2) - radius = 50 - axes = (radius, radius) - start_angle = -90 - end_angle = start_angle + angle - color = (0, 255, 0) - thickness = -1 - cv2.ellipse( - canvas, center, axes, 0, start_angle, end_angle, color, thickness - ) - else: - countdown_active = False - face_detection_start_time = None - text = "Face not detected" - font = cv2.FONT_HERSHEY_SIMPLEX - font_scale = 2 - color = (0, 0, 255) - thickness = 3 - text_size, _ = cv2.getTextSize(text, font, font_scale, thickness) - text_x = (screen_width - text_size[0]) // 2 - text_y = (screen_height + text_size[1]) // 2 - cv2.putText( - canvas, text, (text_x, text_y), font, font_scale, color, thickness + f, blink = gaze_estimator.extract_features(frame) + face = f is not None and not blink + c = np.zeros((sh, sw, 3), dtype=np.uint8) + now = time.time() + if face: + if not countdown: + fd_start = now + countdown = True + elapsed = now - fd_start + if elapsed >= dur: + return True + t = elapsed / dur + e = t * t * (3 - 2 * t) + ang = 360 * (1 - e) + cv2.ellipse( + c, (sw // 2, sh // 2), (50, 50), 0, -90, -90 + ang, (0, 255, 0), -1 ) - - cv2.imshow("Calibration", canvas) + else: + countdown = False + fd_start = None + txt = "Face not detected" + fs = 2 + thick = 3 + size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick) + tx = (sw - size[0]) // 2 + ty = (sh + size[1]) // 2 + cv2.putText( + c, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick + ) + cv2.imshow("Calibration", c) if cv2.waitKey(1) == 27: - cap.release() - cv2.destroyWindow("Calibration") - return + return False - start_time = time.time() - for frame_idx in range(total_frames): - ret, frame = cap.read() + +def run_9_point_calibration(gaze_estimator, camera_index=0): + """ + Standard 9-point calibration + """ + root = tk.Tk() + sw, sh = root.winfo_screenwidth(), root.winfo_screenheight() + root.destroy() + cap = cv2.VideoCapture(camera_index) + if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2): + cap.release() + cv2.destroyAllWindows() + return + mx, my = int(sw * 0.1), int(sh * 0.1) + gw, gh = sw - 2 * mx, sh - 2 * my + order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)] + pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order] + feats, targs = [], [] + pulse_d, cd_d = 1.0, 1.0 + for cycle in range(2): + for x, y in pts: + ps = time.time() + final_radius = 20 + while True: + e = time.time() - ps + if e > pulse_d: + break + r, f = cap.read() + if not r: + continue + c = np.zeros((sh, sw, 3), dtype=np.uint8) + radius = 15 + int(15 * abs(np.sin(2 * np.pi * e))) + final_radius = radius + cv2.circle(c, (x, y), radius, (0, 255, 0), -1) + cv2.imshow("Calibration", c) + if cv2.waitKey(1) == 27: + cap.release() + cv2.destroyAllWindows() + return + cs = time.time() + while True: + e = time.time() - cs + if e > cd_d: + break + r, f = cap.read() + if not r: + continue + c = np.zeros((sh, sw, 3), dtype=np.uint8) + cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1) + t = e / cd_d + ease = t * t * (3 - 2 * t) + ang = 360 * (1 - ease) + cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4) + cv2.imshow("Calibration", c) + if cv2.waitKey(1) == 27: + cap.release() + cv2.destroyAllWindows() + return + ft, blink = gaze_estimator.extract_features(f) + if ft is not None and not blink: + feats.append(ft) + targs.append([x, y]) + cap.release() + cv2.destroyAllWindows() + if feats: + gaze_estimator.train(np.array(feats), np.array(targs)) + + +def run_5_point_calibration(gaze_estimator, camera_index=0): + """ + Simpler 5-point calibration + """ + root = tk.Tk() + sw, sh = root.winfo_screenwidth(), root.winfo_screenheight() + root.destroy() + cap = cv2.VideoCapture(camera_index) + if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2): + cap.release() + cv2.destroyAllWindows() + return + m = 100 + # center, top-left, top-right, bottom-left, bottom-right + order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)] + pts = [] + for r, c in order: + x = m if c == 0 else (sw - m if c == 2 else sw // 2) + y = m if r == 0 else (sh - m if r == 2 else sh // 2) + pts.append((x, y)) + feats, targs = [], [] + pd, cd = 1.0, 1.0 + for cycle in range(2): + for x, y in pts: + ps = time.time() + final_radius = 20 + while True: + e = time.time() - ps + if e > pd: + break + r, f = cap.read() + if not r: + continue + c = np.zeros((sh, sw, 3), dtype=np.uint8) + radius = 15 + int(15 * abs(np.sin(2 * np.pi * e))) + final_radius = radius + cv2.circle(c, (x, y), radius, (0, 255, 0), -1) + cv2.imshow("Calibration", c) + if cv2.waitKey(1) == 27: + cap.release() + cv2.destroyAllWindows() + return + cs = time.time() + while True: + e = time.time() - cs + if e > cd: + break + r, f = cap.read() + if not r: + continue + c = np.zeros((sh, sw, 3), dtype=np.uint8) + cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1) + t = e / cd + ease = t * t * (3 - 2 * t) + ang = 360 * (1 - ease) + cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4) + cv2.imshow("Calibration", c) + if cv2.waitKey(1) == 27: + cap.release() + cv2.destroyAllWindows() + return + ft, blink = gaze_estimator.extract_features(f) + if ft is not None and not blink: + feats.append(ft) + targs.append([x, y]) + cap.release() + cv2.destroyAllWindows() + if feats: + gaze_estimator.train(np.array(feats), np.array(targs)) + + +def run_lissajous_calibration(gaze_estimator, camera_index=0): + """ + Moves a calibration point in a Lissajous curve + """ + root = tk.Tk() + sw, sh = root.winfo_screenwidth(), root.winfo_screenheight() + root.destroy() + cap = cv2.VideoCapture(camera_index) + if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2): + cap.release() + cv2.destroyAllWindows() + return + A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0 + + def curve(t): + return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2) + + tt = 5.0 + fps = 60 + frames = int(tt * fps) + feats, targs = [], [] + vals = [] + acc = 0 + + # Generate a time scale that speeds up / slows down sinusoidally + for i in range(frames): + frac = i / (frames - 1) + spd = 0.3 + 0.7 * np.sin(np.pi * frac) + acc += spd / fps + end = acc + if end < 1e-6: + end = 1e-6 + acc = 0 + + for i in range(frames): + frac = i / (frames - 1) + spd = 0.3 + 0.7 * np.sin(np.pi * frac) + acc += spd / fps + t = (acc / end) * (2 * np.pi) + ret, f = cap.read() if not ret: continue - - t = (time.time() - start_time) * (2 * np.pi / total_time) - x, y = lissajous_curve(t, A, B, a, b, delta) - x, y = int(x), int(y) - - canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8) - cv2.circle(canvas, (x, y), 20, (0, 255, 0), -1) - - cv2.imshow("Calibration", canvas) - cv2.waitKey(1) - - features, blink_detected = gaze_estimator.extract_features(frame) - if features is not None and not blink_detected: - features_list.append(features) - targets_list.append([x, y]) + x, y = curve(t) + c = np.zeros((sh, sw, 3), dtype=np.uint8) + cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1) + cv2.imshow("Calibration", c) + if cv2.waitKey(1) == 27: + break + ft, blink = gaze_estimator.extract_features(f) + if ft is not None and not blink: + feats.append(ft) + targs.append([x, y]) cap.release() - cv2.destroyWindow("Calibration") - - X = np.array(features_list) - y = np.array(targets_list) - - gaze_estimator.train(X, y) + cv2.destroyAllWindows() + if feats: + gaze_estimator.train(np.array(feats), np.array(targs)) def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0): + """ + Quick fine-tuning pass to adjust Kalman filter's measurementNoiseCov. + """ root = tk.Tk() screen_width = root.winfo_screenwidth() screen_height = root.winfo_screenheight() @@ -158,7 +282,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0): ] points = initial_points.copy() - proximity_threshold = screen_width / 5 initial_delay = 0.5 data_collection_duration = 0.5 @@ -167,7 +290,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0): cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) cap = cv2.VideoCapture(camera_index) - gaze_positions = [] while len(points) > 0: @@ -225,7 +347,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0): current_time - point["collection_start_time"] ) point["collected_gaze"].append([gaze_x, gaze_y]) - shake_amplitude = int( 5 + (data_collection_elapsed / data_collection_duration) * 20 @@ -237,8 +358,8 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0): np.random.uniform(-shake_amplitude, shake_amplitude) ) shaken_position = ( - int(point["position"][0] + shake_x), - int(point["position"][1] + shake_y), + point["position"][0] + shake_x, + point["position"][1] + shake_y, ) cv2.circle(canvas, shaken_position, 20, (0, 255, 0), -1) @@ -274,7 +395,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0): gaze_variance = np.var(gaze_positions, axis=0) gaze_variance[gaze_variance == 0] = 1e-4 - kalman.measurementNoiseCov = np.array( [[gaze_variance[0], 0], [0, gaze_variance[1]]], dtype=np.float32 ) diff --git a/demo.py b/demo.py index 64475b3..1d1473e 100644 --- a/demo.py +++ b/demo.py @@ -5,7 +5,12 @@ import time import argparse import os from gaze_estimator import GazeEstimator -from calibration import run_calibration, fine_tune_kalman_filter +from calibration import ( + run_9_point_calibration, + run_5_point_calibration, + run_lissajous_calibration, + fine_tune_kalman_filter, +) from scipy.stats import gaussian_kde @@ -20,6 +25,12 @@ def main(): help="Filter method: kalman, kde, or none", ) parser.add_argument("--camera", type=int, default=0, help="Camera index") + parser.add_argument( + "--calibration", + choices=["9p", "5p", "lissajous"], + default="9p", + help="Choose calibration method (9p, 5p, or lissajous).", + ) parser.add_argument( "--background", type=str, default=None, help="Path to background image" ) @@ -33,18 +44,26 @@ def main(): filter_method = args.filter camera_index = args.camera + calibration_method = args.calibration background_path = args.background confidence_level = args.confidence gaze_estimator = GazeEstimator() - run_calibration(gaze_estimator, camera_index=camera_index) + # Run the chosen calibration method (default 9p) + if calibration_method == "9p": + run_9_point_calibration(gaze_estimator, camera_index=camera_index) + elif calibration_method == "5p": + run_5_point_calibration(gaze_estimator, camera_index=camera_index) + else: + run_lissajous_calibration(gaze_estimator, camera_index=camera_index) if filter_method == "kalman": kalman = cv2.KalmanFilter(4, 2) kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32) kalman.transitionMatrix = np.array( - [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32 + [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], + np.float32, ) kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10 kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1