Better calibration

2026-02-18 10:49:59 -06:00 · 2025-02-27 11:26:09 +08:00
parent 9414d3c1bb
commit 476e6c24f3
2 changed files with 255 additions and 116 deletions
--- a/calibration.py
+++ b/calibration.py
@@ -5,129 +5,253 @@ import time
 from gaze_estimator import GazeEstimator


-def run_calibration(gaze_estimator, camera_index=0):
-    root = tk.Tk()
-    screen_width = root.winfo_screenwidth()
-    screen_height = root.winfo_screenheight()
-    root.destroy()
-
-    A = screen_width * 0.4
-    B = screen_height * 0.4
-    a = 3
-    b = 2
-    delta = 0
-
-    total_time = 5
-    fps = 60
-    total_frames = int(total_time * fps)
-
+def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur=2):
+    """
+    Waits for a face to be detected (not blinking), then does a countdown ellipse.
+    """
    cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN)
    cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
-
-    cap = cv2.VideoCapture(camera_index)
-
-    features_list = []
-    targets_list = []
-
-    def lissajous_curve(t, A, B, a, b, delta):
-        x = A * np.sin(a * t + delta) + screen_width / 2
-        y = B * np.sin(b * t) + screen_height / 2
-        return x, y
-
-    face_detected = False
-    countdown_active = False
-    face_detection_start_time = None
-    countdown_duration = 2
-
+    fd_start = None
+    countdown = False
    while True:
        ret, frame = cap.read()
        if not ret:
            continue
-
-        features, blink_detected = gaze_estimator.extract_features(frame)
-        if features is not None and not blink_detected:
-            face_detected = True
-        else:
-            face_detected = False
-
-        canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
-
-        current_time = time.time()
-
-        if face_detected:
-            if not countdown_active:
-                face_detection_start_time = current_time
-                countdown_active = True
-            elapsed_time = current_time - face_detection_start_time
-            if elapsed_time >= countdown_duration:
-                countdown_active = False
-                break
-            else:
-                t = elapsed_time / countdown_duration
-                eased_t = t * t * (3 - 2 * t)
-                angle = 360 * (1 - eased_t)
-                center = (screen_width // 2, screen_height // 2)
-                radius = 50
-                axes = (radius, radius)
-                start_angle = -90
-                end_angle = start_angle + angle
-                color = (0, 255, 0)
-                thickness = -1
-                cv2.ellipse(
-                    canvas, center, axes, 0, start_angle, end_angle, color, thickness
-                )
-        else:
-            countdown_active = False
-            face_detection_start_time = None
-            text = "Face not detected"
-            font = cv2.FONT_HERSHEY_SIMPLEX
-            font_scale = 2
-            color = (0, 0, 255)
-            thickness = 3
-            text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
-            text_x = (screen_width - text_size[0]) // 2
-            text_y = (screen_height + text_size[1]) // 2
-            cv2.putText(
-                canvas, text, (text_x, text_y), font, font_scale, color, thickness
+        f, blink = gaze_estimator.extract_features(frame)
+        face = f is not None and not blink
+        c = np.zeros((sh, sw, 3), dtype=np.uint8)
+        now = time.time()
+        if face:
+            if not countdown:
+                fd_start = now
+                countdown = True
+            elapsed = now - fd_start
+            if elapsed >= dur:
+                return True
+            t = elapsed / dur
+            e = t * t * (3 - 2 * t)
+            ang = 360 * (1 - e)
+            cv2.ellipse(
+                c, (sw // 2, sh // 2), (50, 50), 0, -90, -90 + ang, (0, 255, 0), -1
            )
-
-        cv2.imshow("Calibration", canvas)
+        else:
+            countdown = False
+            fd_start = None
+            txt = "Face not detected"
+            fs = 2
+            thick = 3
+            size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick)
+            tx = (sw - size[0]) // 2
+            ty = (sh + size[1]) // 2
+            cv2.putText(
+                c, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick
+            )
+        cv2.imshow("Calibration", c)
        if cv2.waitKey(1) == 27:
-            cap.release()
-            cv2.destroyWindow("Calibration")
-            return
+            return False

-    start_time = time.time()
-    for frame_idx in range(total_frames):
-        ret, frame = cap.read()
+
+def run_9_point_calibration(gaze_estimator, camera_index=0):
+    """
+    Standard 9-point calibration
+    """
+    root = tk.Tk()
+    sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
+    root.destroy()
+    cap = cv2.VideoCapture(camera_index)
+    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
+        cap.release()
+        cv2.destroyAllWindows()
+        return
+    mx, my = int(sw * 0.1), int(sh * 0.1)
+    gw, gh = sw - 2 * mx, sh - 2 * my
+    order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)]
+    pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
+    feats, targs = [], []
+    pulse_d, cd_d = 1.0, 1.0
+    for cycle in range(2):
+        for x, y in pts:
+            ps = time.time()
+            final_radius = 20
+            while True:
+                e = time.time() - ps
+                if e > pulse_d:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
+                final_radius = radius
+                cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+            cs = time.time()
+            while True:
+                e = time.time() - cs
+                if e > cd_d:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
+                t = e / cd_d
+                ease = t * t * (3 - 2 * t)
+                ang = 360 * (1 - ease)
+                cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+                ft, blink = gaze_estimator.extract_features(f)
+                if ft is not None and not blink:
+                    feats.append(ft)
+                    targs.append([x, y])
+    cap.release()
+    cv2.destroyAllWindows()
+    if feats:
+        gaze_estimator.train(np.array(feats), np.array(targs))
+
+
+def run_5_point_calibration(gaze_estimator, camera_index=0):
+    """
+    Simpler 5-point calibration
+    """
+    root = tk.Tk()
+    sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
+    root.destroy()
+    cap = cv2.VideoCapture(camera_index)
+    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
+        cap.release()
+        cv2.destroyAllWindows()
+        return
+    m = 100
+    # center, top-left, top-right, bottom-left, bottom-right
+    order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)]
+    pts = []
+    for r, c in order:
+        x = m if c == 0 else (sw - m if c == 2 else sw // 2)
+        y = m if r == 0 else (sh - m if r == 2 else sh // 2)
+        pts.append((x, y))
+    feats, targs = [], []
+    pd, cd = 1.0, 1.0
+    for cycle in range(2):
+        for x, y in pts:
+            ps = time.time()
+            final_radius = 20
+            while True:
+                e = time.time() - ps
+                if e > pd:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
+                final_radius = radius
+                cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+            cs = time.time()
+            while True:
+                e = time.time() - cs
+                if e > cd:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
+                t = e / cd
+                ease = t * t * (3 - 2 * t)
+                ang = 360 * (1 - ease)
+                cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+                ft, blink = gaze_estimator.extract_features(f)
+                if ft is not None and not blink:
+                    feats.append(ft)
+                    targs.append([x, y])
+    cap.release()
+    cv2.destroyAllWindows()
+    if feats:
+        gaze_estimator.train(np.array(feats), np.array(targs))
+
+
+def run_lissajous_calibration(gaze_estimator, camera_index=0):
+    """
+    Moves a calibration point in a Lissajous curve
+    """
+    root = tk.Tk()
+    sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
+    root.destroy()
+    cap = cv2.VideoCapture(camera_index)
+    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
+        cap.release()
+        cv2.destroyAllWindows()
+        return
+    A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0
+
+    def curve(t):
+        return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2)
+
+    tt = 5.0
+    fps = 60
+    frames = int(tt * fps)
+    feats, targs = [], []
+    vals = []
+    acc = 0
+
+    # Generate a time scale that speeds up / slows down sinusoidally
+    for i in range(frames):
+        frac = i / (frames - 1)
+        spd = 0.3 + 0.7 * np.sin(np.pi * frac)
+        acc += spd / fps
+    end = acc
+    if end < 1e-6:
+        end = 1e-6
+    acc = 0
+
+    for i in range(frames):
+        frac = i / (frames - 1)
+        spd = 0.3 + 0.7 * np.sin(np.pi * frac)
+        acc += spd / fps
+        t = (acc / end) * (2 * np.pi)
+        ret, f = cap.read()
        if not ret:
            continue
-
-        t = (time.time() - start_time) * (2 * np.pi / total_time)
-        x, y = lissajous_curve(t, A, B, a, b, delta)
-        x, y = int(x), int(y)
-
-        canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
-        cv2.circle(canvas, (x, y), 20, (0, 255, 0), -1)
-
-        cv2.imshow("Calibration", canvas)
-        cv2.waitKey(1)
-
-        features, blink_detected = gaze_estimator.extract_features(frame)
-        if features is not None and not blink_detected:
-            features_list.append(features)
-            targets_list.append([x, y])
+        x, y = curve(t)
+        c = np.zeros((sh, sw, 3), dtype=np.uint8)
+        cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1)
+        cv2.imshow("Calibration", c)
+        if cv2.waitKey(1) == 27:
+            break
+        ft, blink = gaze_estimator.extract_features(f)
+        if ft is not None and not blink:
+            feats.append(ft)
+            targs.append([x, y])

    cap.release()
-    cv2.destroyWindow("Calibration")
-
-    X = np.array(features_list)
-    y = np.array(targets_list)
-
-    gaze_estimator.train(X, y)
+    cv2.destroyAllWindows()
+    if feats:
+        gaze_estimator.train(np.array(feats), np.array(targs))


 def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
+    """
+    Quick fine-tuning pass to adjust Kalman filter's measurementNoiseCov.
+    """
    root = tk.Tk()
    screen_width = root.winfo_screenwidth()
    screen_height = root.winfo_screenheight()
@@ -158,7 +282,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
    ]

    points = initial_points.copy()
-
    proximity_threshold = screen_width / 5
    initial_delay = 0.5
    data_collection_duration = 0.5
@@ -167,7 +290,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
    cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

    cap = cv2.VideoCapture(camera_index)
-
    gaze_positions = []

    while len(points) > 0:
@@ -225,7 +347,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
                            current_time - point["collection_start_time"]
                        )
                        point["collected_gaze"].append([gaze_x, gaze_y])
-
                        shake_amplitude = int(
                            5
                            + (data_collection_elapsed / data_collection_duration) * 20
@@ -237,8 +358,8 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
                            np.random.uniform(-shake_amplitude, shake_amplitude)
                        )
                        shaken_position = (
-                            int(point["position"][0] + shake_x),
-                            int(point["position"][1] + shake_y),
+                            point["position"][0] + shake_x,
+                            point["position"][1] + shake_y,
                        )
                        cv2.circle(canvas, shaken_position, 20, (0, 255, 0), -1)

@@ -274,7 +395,6 @@ def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):

    gaze_variance = np.var(gaze_positions, axis=0)
    gaze_variance[gaze_variance == 0] = 1e-4
-
    kalman.measurementNoiseCov = np.array(
        [[gaze_variance[0], 0], [0, gaze_variance[1]]], dtype=np.float32
    )
--- a/demo.py
+++ b/demo.py
@@ -5,7 +5,12 @@ import time
 import argparse
 import os
 from gaze_estimator import GazeEstimator
-from calibration import run_calibration, fine_tune_kalman_filter
+from calibration import (
+    run_9_point_calibration,
+    run_5_point_calibration,
+    run_lissajous_calibration,
+    fine_tune_kalman_filter,
+)
 from scipy.stats import gaussian_kde


@@ -20,6 +25,12 @@ def main():
        help="Filter method: kalman, kde, or none",
    )
    parser.add_argument("--camera", type=int, default=0, help="Camera index")
+    parser.add_argument(
+        "--calibration",
+        choices=["9p", "5p", "lissajous"],
+        default="9p",
+        help="Choose calibration method (9p, 5p, or lissajous).",
+    )
    parser.add_argument(
        "--background", type=str, default=None, help="Path to background image"
    )
@@ -33,18 +44,26 @@ def main():

    filter_method = args.filter
    camera_index = args.camera
+    calibration_method = args.calibration
    background_path = args.background
    confidence_level = args.confidence

    gaze_estimator = GazeEstimator()

-    run_calibration(gaze_estimator, camera_index=camera_index)
+    # Run the chosen calibration method (default 9p)
+    if calibration_method == "9p":
+        run_9_point_calibration(gaze_estimator, camera_index=camera_index)
+    elif calibration_method == "5p":
+        run_5_point_calibration(gaze_estimator, camera_index=camera_index)
+    else:
+        run_lissajous_calibration(gaze_estimator, camera_index=camera_index)

    if filter_method == "kalman":
        kalman = cv2.KalmanFilter(4, 2)
        kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
        kalman.transitionMatrix = np.array(
-            [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
+            [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]],
+            np.float32,
        )
        kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10
        kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1