Refactor to library

2026-02-18 10:49:59 -06:00 · 2025-04-23 11:25:54 +08:00
parent cd156428e7
commit 77df3c8c2f
21 changed files with 1818 additions and 957 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
-shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.12
+>=3.9
--- a/init.py
+++ b/init.py
@@ -1 +0,0 @@
-from .gaze_estimator import GazeEstimator
--- a/calibration.py
+++ b/calibration.py
@@ -1,400 +0,0 @@
-import cv2
-import numpy as np
-import tkinter as tk
-import time
-from gaze_estimator import GazeEstimator
-
-
-def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur=2):
-    """
-    Waits for a face to be detected (not blinking), then does a countdown ellipse.
-    """
-    cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN)
-    cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
-    fd_start = None
-    countdown = False
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            continue
-        f, blink = gaze_estimator.extract_features(frame)
-        face = f is not None and not blink
-        c = np.zeros((sh, sw, 3), dtype=np.uint8)
-        now = time.time()
-        if face:
-            if not countdown:
-                fd_start = now
-                countdown = True
-            elapsed = now - fd_start
-            if elapsed >= dur:
-                return True
-            t = elapsed / dur
-            e = t * t * (3 - 2 * t)
-            ang = 360 * (1 - e)
-            cv2.ellipse(
-                c, (sw // 2, sh // 2), (50, 50), 0, -90, -90 + ang, (0, 255, 0), -1
-            )
-        else:
-            countdown = False
-            fd_start = None
-            txt = "Face not detected"
-            fs = 2
-            thick = 3
-            size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick)
-            tx = (sw - size[0]) // 2
-            ty = (sh + size[1]) // 2
-            cv2.putText(
-                c, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick
-            )
-        cv2.imshow("Calibration", c)
-        if cv2.waitKey(1) == 27:
-            return False
-
-
-def run_9_point_calibration(gaze_estimator, camera_index=0):
-    """
-    Standard 9-point calibration
-    """
-    root = tk.Tk()
-    sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
-    root.destroy()
-    cap = cv2.VideoCapture(camera_index)
-    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
-        cap.release()
-        cv2.destroyAllWindows()
-        return
-    mx, my = int(sw * 0.1), int(sh * 0.1)
-    gw, gh = sw - 2 * mx, sh - 2 * my
-    order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)]
-    pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
-    feats, targs = [], []
-    pulse_d, cd_d = 1.0, 1.0
-    for cycle in range(1):
-        for x, y in pts:
-            ps = time.time()
-            final_radius = 20
-            while True:
-                e = time.time() - ps
-                if e > pulse_d:
-                    break
-                r, f = cap.read()
-                if not r:
-                    continue
-                c = np.zeros((sh, sw, 3), dtype=np.uint8)
-                radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
-                final_radius = radius
-                cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
-                cv2.imshow("Calibration", c)
-                if cv2.waitKey(1) == 27:
-                    cap.release()
-                    cv2.destroyAllWindows()
-                    return
-            cs = time.time()
-            while True:
-                e = time.time() - cs
-                if e > cd_d:
-                    break
-                r, f = cap.read()
-                if not r:
-                    continue
-                c = np.zeros((sh, sw, 3), dtype=np.uint8)
-                cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
-                t = e / cd_d
-                ease = t * t * (3 - 2 * t)
-                ang = 360 * (1 - ease)
-                cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
-                cv2.imshow("Calibration", c)
-                if cv2.waitKey(1) == 27:
-                    cap.release()
-                    cv2.destroyAllWindows()
-                    return
-                ft, blink = gaze_estimator.extract_features(f)
-                if ft is not None and not blink:
-                    feats.append(ft)
-                    targs.append([x, y])
-    cap.release()
-    cv2.destroyAllWindows()
-    if feats:
-        gaze_estimator.train(np.array(feats), np.array(targs))
-
-
-def run_5_point_calibration(gaze_estimator, camera_index=0):
-    """
-    Simpler 5-point calibration
-    """
-    root = tk.Tk()
-    sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
-    root.destroy()
-    cap = cv2.VideoCapture(camera_index)
-    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
-        cap.release()
-        cv2.destroyAllWindows()
-        return
-    m = 100
-    # center, top-left, top-right, bottom-left, bottom-right
-    order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)]
-    pts = []
-    for r, c in order:
-        x = m if c == 0 else (sw - m if c == 2 else sw // 2)
-        y = m if r == 0 else (sh - m if r == 2 else sh // 2)
-        pts.append((x, y))
-    feats, targs = [], []
-    pd, cd = 1.0, 1.0
-    for cycle in range(1):
-        for x, y in pts:
-            ps = time.time()
-            final_radius = 20
-            while True:
-                e = time.time() - ps
-                if e > pd:
-                    break
-                r, f = cap.read()
-                if not r:
-                    continue
-                c = np.zeros((sh, sw, 3), dtype=np.uint8)
-                radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
-                final_radius = radius
-                cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
-                cv2.imshow("Calibration", c)
-                if cv2.waitKey(1) == 27:
-                    cap.release()
-                    cv2.destroyAllWindows()
-                    return
-            cs = time.time()
-            while True:
-                e = time.time() - cs
-                if e > cd:
-                    break
-                r, f = cap.read()
-                if not r:
-                    continue
-                c = np.zeros((sh, sw, 3), dtype=np.uint8)
-                cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
-                t = e / cd
-                ease = t * t * (3 - 2 * t)
-                ang = 360 * (1 - ease)
-                cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
-                cv2.imshow("Calibration", c)
-                if cv2.waitKey(1) == 27:
-                    cap.release()
-                    cv2.destroyAllWindows()
-                    return
-                ft, blink = gaze_estimator.extract_features(f)
-                if ft is not None and not blink:
-                    feats.append(ft)
-                    targs.append([x, y])
-    cap.release()
-    cv2.destroyAllWindows()
-    if feats:
-        gaze_estimator.train(np.array(feats), np.array(targs))
-
-
-def run_lissajous_calibration(gaze_estimator, camera_index=0):
-    """
-    Moves a calibration point in a Lissajous curve
-    """
-    root = tk.Tk()
-    sw, sh = root.winfo_screenwidth(), root.winfo_screenheight()
-    root.destroy()
-    cap = cv2.VideoCapture(camera_index)
-    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
-        cap.release()
-        cv2.destroyAllWindows()
-        return
-    A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0
-
-    def curve(t):
-        return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2)
-
-    tt = 5.0
-    fps = 60
-    frames = int(tt * fps)
-    feats, targs = [], []
-    vals = []
-    acc = 0
-
-    # Generate a time scale that speeds up / slows down sinusoidally
-    for i in range(frames):
-        frac = i / (frames - 1)
-        spd = 0.3 + 0.7 * np.sin(np.pi * frac)
-        acc += spd / fps
-    end = acc
-    if end < 1e-6:
-        end = 1e-6
-    acc = 0
-
-    for i in range(frames):
-        frac = i / (frames - 1)
-        spd = 0.3 + 0.7 * np.sin(np.pi * frac)
-        acc += spd / fps
-        t = (acc / end) * (2 * np.pi)
-        ret, f = cap.read()
-        if not ret:
-            continue
-        x, y = curve(t)
-        c = np.zeros((sh, sw, 3), dtype=np.uint8)
-        cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1)
-        cv2.imshow("Calibration", c)
-        if cv2.waitKey(1) == 27:
-            break
-        ft, blink = gaze_estimator.extract_features(f)
-        if ft is not None and not blink:
-            feats.append(ft)
-            targs.append([x, y])
-
-    cap.release()
-    cv2.destroyAllWindows()
-    if feats:
-        gaze_estimator.train(np.array(feats), np.array(targs))
-
-
-def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=0):
-    """
-    Quick fine-tuning pass to adjust Kalman filter's measurementNoiseCov.
-    """
-    root = tk.Tk()
-    screen_width = root.winfo_screenwidth()
-    screen_height = root.winfo_screenheight()
-    root.destroy()
-
-    initial_points = [
-        {
-            "position": (screen_width // 2, screen_height // 4),
-            "start_time": None,
-            "data_collection_started": False,
-            "collection_start_time": None,
-            "collected_gaze": [],
-        },
-        {
-            "position": (screen_width // 4, 3 * screen_height // 4),
-            "start_time": None,
-            "data_collection_started": False,
-            "collection_start_time": None,
-            "collected_gaze": [],
-        },
-        {
-            "position": (3 * screen_width // 4, 3 * screen_height // 4),
-            "start_time": None,
-            "data_collection_started": False,
-            "collection_start_time": None,
-            "collected_gaze": [],
-        },
-    ]
-
-    points = initial_points.copy()
-    proximity_threshold = screen_width / 5
-    initial_delay = 0.5
-    data_collection_duration = 0.5
-
-    cv2.namedWindow("Fine Tuning", cv2.WND_PROP_FULLSCREEN)
-    cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
-
-    cap = cv2.VideoCapture(camera_index)
-    gaze_positions = []
-
-    while len(points) > 0:
-        ret, frame = cap.read()
-        if not ret:
-            continue
-
-        features, blink_detected = gaze_estimator.extract_features(frame)
-        canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
-
-        for point in points:
-            cv2.circle(canvas, point["position"], 20, (0, 255, 0), -1)
-
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        font_scale = 1.5
-        color = (255, 255, 255)
-        thickness = 2
-        text = "Look at the points until they disappear"
-        text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
-        text_x = (screen_width - text_size[0]) // 2
-        text_y = screen_height - 50
-        cv2.putText(canvas, text, (text_x, text_y), font, font_scale, color, thickness)
-
-        current_time = time.time()
-
-        if features is not None and not blink_detected:
-            X = np.array([features])
-            gaze_point = gaze_estimator.predict(X)[0]
-            gaze_x, gaze_y = int(gaze_point[0]), int(gaze_point[1])
-
-            cv2.circle(canvas, (gaze_x, gaze_y), 10, (255, 0, 0), -1)
-
-            for point in points[:]:
-                dx = gaze_x - point["position"][0]
-                dy = gaze_y - point["position"][1]
-                distance = np.sqrt(dx * dx + dy * dy)
-                if distance <= proximity_threshold:
-                    if point["start_time"] is None:
-                        point["start_time"] = current_time
-                        point["data_collection_started"] = False
-                        point["collection_start_time"] = None
-                        point["collected_gaze"] = []
-                    elapsed_time = current_time - point["start_time"]
-
-                    if (
-                        not point["data_collection_started"]
-                        and elapsed_time >= initial_delay
-                    ):
-                        point["data_collection_started"] = True
-                        point["collection_start_time"] = current_time
-                        point["collected_gaze"] = []
-
-                    if point["data_collection_started"]:
-                        data_collection_elapsed = (
-                            current_time - point["collection_start_time"]
-                        )
-                        point["collected_gaze"].append([gaze_x, gaze_y])
-                        shake_amplitude = int(
-                            5
-                            + (data_collection_elapsed / data_collection_duration) * 20
-                        )
-                        shake_x = int(
-                            np.random.uniform(-shake_amplitude, shake_amplitude)
-                        )
-                        shake_y = int(
-                            np.random.uniform(-shake_amplitude, shake_amplitude)
-                        )
-                        shaken_position = (
-                            point["position"][0] + shake_x,
-                            point["position"][1] + shake_y,
-                        )
-                        cv2.circle(canvas, shaken_position, 20, (0, 255, 0), -1)
-
-                        if data_collection_elapsed >= data_collection_duration:
-                            gaze_positions.extend(point["collected_gaze"])
-                            points.remove(point)
-                    else:
-                        cv2.circle(canvas, point["position"], 25, (0, 255, 255), 2)
-                else:
-                    point["start_time"] = None
-                    point["data_collection_started"] = False
-                    point["collection_start_time"] = None
-                    point["collected_gaze"] = []
-        else:
-            for point in points:
-                point["start_time"] = None
-                point["data_collection_started"] = False
-                point["collection_start_time"] = None
-                point["collected_gaze"] = []
-
-        cv2.imshow("Fine Tuning", canvas)
-        if cv2.waitKey(1) == 27:
-            cap.release()
-            cv2.destroyWindow("Fine Tuning")
-            return
-
-    cap.release()
-    cv2.destroyWindow("Fine Tuning")
-
-    gaze_positions = np.array(gaze_positions)
-    if gaze_positions.shape[0] < 2:
-        return
-
-    gaze_variance = np.var(gaze_positions, axis=0)
-    gaze_variance[gaze_variance == 0] = 1e-4
-    kalman.measurementNoiseCov = np.array(
-        [[gaze_variance[0], 0], [0, gaze_variance[1]]], dtype=np.float32
-    )
--- a/demo.py
+++ b/demo.py
@@ -1,272 +0,0 @@
-import cv2
-import numpy as np
-import tkinter as tk
-import time
-import argparse
-import os
-from gaze_estimator import GazeEstimator
-from calibration import (
-    run_9_point_calibration,
-    run_5_point_calibration,
-    run_lissajous_calibration,
-    fine_tune_kalman_filter,
-)
-from scipy.stats import gaussian_kde
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Gaze Estimation with Kalman Filter or KDE"
-    )
-    parser.add_argument(
-        "--filter",
-        choices=["kalman", "kde", "none"],
-        default="none",
-        help="Filter method: kalman, kde, or none",
-    )
-    parser.add_argument("--camera", type=int, default=0, help="Camera index")
-    parser.add_argument(
-        "--calibration",
-        choices=["9p", "5p", "lissajous"],
-        default="9p",
-        help="Choose calibration method (9p, 5p, or lissajous).",
-    )
-    parser.add_argument(
-        "--background", type=str, default=None, help="Path to background image"
-    )
-    parser.add_argument(
-        "--confidence",
-        type=float,
-        default=0.5,
-        help="Confidence interval for KDE contour (0 < value < 1)",
-    )
-    args = parser.parse_args()
-
-    filter_method = args.filter
-    camera_index = args.camera
-    calibration_method = args.calibration
-    background_path = args.background
-    confidence_level = args.confidence
-
-    gaze_estimator = GazeEstimator()
-
-    # Run the chosen calibration method (default 9p)
-    if calibration_method == "9p":
-        run_9_point_calibration(gaze_estimator, camera_index=camera_index)
-    elif calibration_method == "5p":
-        run_5_point_calibration(gaze_estimator, camera_index=camera_index)
-    else:
-        run_lissajous_calibration(gaze_estimator, camera_index=camera_index)
-
-    if filter_method == "kalman":
-        kalman = cv2.KalmanFilter(4, 2)
-        kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
-        kalman.transitionMatrix = np.array(
-            [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]],
-            np.float32,
-        )
-        kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10
-        kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1
-        kalman.statePre = np.zeros((4, 1), np.float32)
-        kalman.statePost = np.zeros((4, 1), np.float32)
-
-        fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=camera_index)
-
-    root = tk.Tk()
-    screen_width = root.winfo_screenwidth()
-    screen_height = root.winfo_screenheight()
-    root.destroy()
-
-    cam_width, cam_height = 320, 240
-
-    if background_path and os.path.isfile(background_path):
-        background = cv2.imread(background_path)
-        background = cv2.resize(background, (screen_width, screen_height))
-    else:
-        background = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
-        background[:] = (50, 50, 50)
-
-    cv2.namedWindow("Gaze Estimation", cv2.WND_PROP_FULLSCREEN)
-    cv2.setWindowProperty(
-        "Gaze Estimation", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN
-    )
-
-    cap = cv2.VideoCapture(camera_index)
-    prev_time = time.time()
-
-    if filter_method == "kde":
-        gaze_history = []
-        time_window = 0.5  # seconds
-
-    # Variables for gaze cursor fade effect
-    cursor_alpha = 0.0
-    cursor_alpha_step = 0.05
-
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            continue
-
-        features, blink_detected = gaze_estimator.extract_features(frame)
-        if features is not None and not blink_detected:
-            X = np.array([features])
-            gaze_point = gaze_estimator.predict(X)[0]
-            x, y = int(gaze_point[0]), int(gaze_point[1])
-
-            if filter_method == "kalman":
-                prediction = kalman.predict()
-                x_pred = int(prediction[0][0])
-                y_pred = int(prediction[1][0])
-
-                # Clamp the predicted gaze point to the screen boundaries
-                x_pred = max(0, min(x_pred, screen_width - 1))
-                y_pred = max(0, min(y_pred, screen_height - 1))
-
-                measurement = np.array([[np.float32(x)], [np.float32(y)]])
-                if np.count_nonzero(kalman.statePre) == 0:
-                    kalman.statePre[:2] = measurement
-                    kalman.statePost[:2] = measurement
-                kalman.correct(measurement)
-
-            elif filter_method == "kde":
-                current_time = time.time()
-                gaze_history.append((current_time, x, y))
-
-                # Remove old entries
-                gaze_history = [
-                    (t, gx, gy)
-                    for (t, gx, gy) in gaze_history
-                    if current_time - t <= time_window
-                ]
-
-                if len(gaze_history) > 1:
-                    gaze_array = np.array([(gx, gy) for (t, gx, gy) in gaze_history])
-
-                    # Check for singular covariance
-                    try:
-                        kde = gaussian_kde(gaze_array.T)
-
-                        # Compute densities on a grid for visualization
-                        xi, yi = np.mgrid[0:screen_width:320j, 0:screen_height:200j]
-                        coords = np.vstack([xi.ravel(), yi.ravel()])
-                        zi = kde(coords).reshape(xi.shape).T
-
-                        # Find the contour level for the desired confidence interval
-                        levels = np.linspace(zi.min(), zi.max(), 100)
-                        zi_flat = zi.flatten()
-                        sorted_indices = np.argsort(zi_flat)[::-1]
-                        zi_sorted = zi_flat[sorted_indices]
-                        cumsum = np.cumsum(zi_sorted)
-                        cumsum /= cumsum[-1]  # Normalize to get CDF
-
-                        # Find the density threshold corresponding to the confidence level
-                        idx = np.searchsorted(cumsum, confidence_level)
-                        if idx >= len(zi_sorted):
-                            idx = len(zi_sorted) - 1
-                        threshold = zi_sorted[idx]
-
-                        # Create a binary mask where densities are above the threshold
-                        mask = np.where(zi >= threshold, 1, 0).astype(np.uint8)
-
-                        # Resize mask to screen dimensions
-                        mask_resized = cv2.resize(mask, (screen_width, screen_height))
-
-                        # Find contours in the binary mask
-                        contours, _ = cv2.findContours(
-                            mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
-                        )
-
-                        x_pred = int(np.mean(gaze_array[:, 0]))
-                        y_pred = int(np.mean(gaze_array[:, 1]))
-                    except np.linalg.LinAlgError:
-                        x_pred = int(np.mean(gaze_array[:, 0]))
-                        y_pred = int(np.mean(gaze_array[:, 1]))
-                        contours = []
-                else:
-                    x_pred, y_pred = x, y
-                    contours = []
-            # Increase cursor alpha for fade-in effect
-            elif filter_method == "none":
-                x_pred, y_pred = x, y
-                contours = []
-
-            cursor_alpha = min(cursor_alpha + cursor_alpha_step, 1.0)
-        else:
-            x_pred, y_pred = None, None
-            blink_detected = True
-            contours = []
-
-            # Decrease cursor alpha for fade-out effect
-            cursor_alpha = max(cursor_alpha - cursor_alpha_step, 0.0)
-
-        canvas = background.copy()
-
-        if filter_method == "kde" and features is not None and not blink_detected:
-            if len(gaze_history) > 1:
-                if "contours" in locals():
-                    cv2.drawContours(canvas, contours, -1, (15, 182, 242), thickness=5)
-
-        # Draw the gaze cursor with fade effect
-        if x_pred is not None and y_pred is not None and cursor_alpha > 0:
-            overlay = canvas.copy()
-            cv2.circle(overlay, (x_pred, y_pred), 30, (0, 0, 255), -1)
-            cv2.circle(overlay, (x_pred, y_pred), 25, (255, 255, 255), -1)
-            cv2.addWeighted(
-                overlay, cursor_alpha * 0.6, canvas, 1 - cursor_alpha * 0.6, 0, canvas
-            )
-
-        # Draw the camera feed
-        small_frame = cv2.resize(frame, (cam_width, cam_height))
-        frame_border = cv2.copyMakeBorder(
-            small_frame, 2, 2, 2, 2, cv2.BORDER_CONSTANT, value=(255, 255, 255)
-        )
-        x_offset = screen_width - cam_width - 20
-        y_offset = screen_height - cam_height - 20
-        canvas[
-            y_offset : y_offset + cam_height + 4, x_offset : x_offset + cam_width + 4
-        ] = frame_border
-
-        # FPS and blink indicator
-        current_time = time.time()
-        fps = 1 / (current_time - prev_time)
-        prev_time = current_time
-
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        font_scale = 1.2
-        font_color = (255, 255, 255)
-        font_thickness = 2
-
-        cv2.putText(
-            canvas,
-            f"FPS: {int(fps)}",
-            (50, 50),
-            font,
-            font_scale,
-            font_color,
-            font_thickness,
-            lineType=cv2.LINE_AA,
-        )
-
-        blink_text = "Blinking" if blink_detected else "Not Blinking"
-        blink_color = (0, 0, 255) if blink_detected else (0, 255, 0)
-        cv2.putText(
-            canvas,
-            blink_text,
-            (50, 100),
-            font,
-            font_scale,
-            blink_color,
-            font_thickness,
-            lineType=cv2.LINE_AA,
-        )
-
-        cv2.imshow("Gaze Estimation", canvas)
-        if cv2.waitKey(1) == 27:
-            break
-
-    cap.release()
-    cv2.destroyAllWindows()
-
-
-if __name__ == "__main__":
-    main()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,14 +1,44 @@
+[build-system]
+requires      = ["hatchling>=1.25"]
+build-backend = "hatchling.build"
+
 [project]
-name = "eyepy"
-version = "0.1.0"
-description = "EyePy is an eye tracking library easily implementable in your projects"
-readme = "README.md"
-requires-python = ">=3.12"
-dependencies = [
-  "mediapipe>=0.10.21",
-  "numpy>=1.26.4",
-  "opencv-python>=4.11.0.86",
-  "pyvirtualcam>=0.12.1",
-  "scikit-learn>=1.6.1",
-  "tk>=0.1.0",
+name            = "eyetrax"
+description     = "Webcam-based eye-tracking"
+readme          = "README.md"
+license         = { file = "LICENSE" }
+authors         = [{ name = "Chenkai Zhang (ck-zhang)" }]
+requires-python = ">=3.9"
+dynamic         = ["version"]
+dependencies    = [
+  "opencv-python>=4.5",
+  "mediapipe>=0.10",
+  "numpy>=1.22",
+  "scikit-learn>=1.3",
+  "scipy>=1.10",
+  "screeninfo>=0.8",
+  "pyvirtualcam>=0.10",
 ]
+
+classifiers = [
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3 :: Only",
+  "Operating System :: OS Independent",
+]
+
+[project.urls]
+homepage = "https://github.com/ck-zhang/eyetrax"
+
+[project.scripts]
+eyetrax-demo       = "eyetrax.app.demo:run_demo"
+eyetrax-virtualcam = "eyetrax.app.virtualcam:run_virtualcam"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/eyetrax"]
+
+[tool.hatch.build]
+include = ["LICENSE"]
+
+[tool.hatch.version]
+path = "src/eyetrax/_version.py"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,35 +0,0 @@
-# This file was autogenerated by uv via the following command:
-#    uv export --no-hashes --format requirements-txt
-absl-py==2.1.0
-attrs==25.1.0
-cffi==1.17.1
-contourpy==1.3.1
-cycler==0.12.1
-flatbuffers==25.2.10
-fonttools==4.56.0
-jax==0.5.1
-jaxlib==0.5.1
-joblib==1.4.2
-kiwisolver==1.4.8
-matplotlib==3.10.0
-mediapipe==0.10.21
-ml-dtypes==0.4.1 ; python_full_version >= '3.13' or sys_platform != 'darwin'
-ml-dtypes==0.5.1 ; python_full_version < '3.13' and sys_platform == 'darwin'
-numpy==1.26.4
-opencv-contrib-python==4.11.0.86
-opencv-python==4.11.0.86
-opt-einsum==3.4.0
-packaging==24.2
-pillow==11.1.0
-protobuf==4.25.6
-pycparser==2.22
-pyparsing==3.2.1
-python-dateutil==2.9.0.post0
-pyvirtualcam==0.12.1
-scikit-learn==1.6.1
-scipy==1.15.2
-sentencepiece==0.2.0
-six==1.17.0
-sounddevice==0.5.1
-threadpoolctl==3.5.0
-tk==0.1.0
--- a/src/eyetrax/init.py
+++ b/src/eyetrax/init.py
@@ -0,0 +1,18 @@
+from ._version import __version__
+from .gaze import GazeEstimator
+
+from .calibration import (
+    run_9_point_calibration,
+    run_5_point_calibration,
+    run_lissajous_calibration,
+    fine_tune_kalman_filter,
+)
+
+__all__ = [
+    "__version__",
+    "GazeEstimator",
+    "run_9_point_calibration",
+    "run_5_point_calibration",
+    "run_lissajous_calibration",
+    "fine_tune_kalman_filter",
+]
--- a/src/eyetrax/_version.py
+++ b/src/eyetrax/_version.py
@@ -0,0 +1,2 @@
+__all__ = ["__version__"]
+__version__ = "0.2.0"
--- a/src/eyetrax/app/init.py
+++ b/src/eyetrax/app/init.py
--- a/src/eyetrax/app/demo.py
+++ b/src/eyetrax/app/demo.py
@@ -0,0 +1,213 @@
+import time
+import cv2
+import numpy as np
+import argparse
+import os
+from scipy.stats import gaussian_kde
+
+from eyetrax.utils.screen import get_screen_size
+from eyetrax.gaze import GazeEstimator
+from eyetrax.calibration import (
+    run_9_point_calibration,
+    run_5_point_calibration,
+    run_lissajous_calibration,
+    fine_tune_kalman_filter,
+)
+
+
+def run_demo():
+    parser = argparse.ArgumentParser(
+        description="Gaze Estimation with Kalman Filter or KDE"
+    )
+    parser.add_argument("--filter", choices=["kalman", "kde", "none"], default="none")
+    parser.add_argument("--camera", type=int, default=0)
+    parser.add_argument(
+        "--calibration", choices=["9p", "5p", "lissajous"], default="9p"
+    )
+    parser.add_argument("--background", type=str, default=None)
+    parser.add_argument("--confidence", type=float, default=0.5, help="0 < value < 1")
+    args = parser.parse_args()
+
+    filter_method = args.filter
+    camera_index = args.camera
+    calibration_method = args.calibration
+    background_path = args.background
+    confidence_level = args.confidence
+
+    gaze_estimator = GazeEstimator()
+
+    if calibration_method == "9p":
+        run_9_point_calibration(gaze_estimator, camera_index=camera_index)
+    elif calibration_method == "5p":
+        run_5_point_calibration(gaze_estimator, camera_index=camera_index)
+    else:
+        run_lissajous_calibration(gaze_estimator, camera_index=camera_index)
+
+    if filter_method == "kalman":
+        kalman = cv2.KalmanFilter(4, 2)
+        kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
+        kalman.transitionMatrix = np.array(
+            [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
+        )
+        kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 50
+        kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 0.2
+        kalman.statePre = np.zeros((4, 1), np.float32)
+        kalman.statePost = np.zeros((4, 1), np.float32)
+        fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=camera_index)
+    else:
+        kalman = None
+
+    screen_width, screen_height = get_screen_size()
+    cam_width, cam_height = 320, 240
+    BORDER = 2
+    MARGIN = 20
+
+    if background_path and os.path.isfile(background_path):
+        background = cv2.imread(background_path)
+        background = cv2.resize(background, (screen_width, screen_height))
+    else:
+        background = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
+        background[:] = (50, 50, 50)
+
+    cv2.namedWindow("Gaze Estimation", cv2.WND_PROP_FULLSCREEN)
+    cv2.setWindowProperty(
+        "Gaze Estimation", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN
+    )
+
+    cap = cv2.VideoCapture(camera_index)
+    prev_time = time.time()
+
+    if filter_method == "kde":
+        gaze_history = []
+        time_window = 0.5
+
+    cursor_alpha = 0.0
+    cursor_step = 0.05
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            continue
+
+        features, blink_detected = gaze_estimator.extract_features(frame)
+        if features is not None and not blink_detected:
+            gaze_point = gaze_estimator.predict(np.array([features]))[0]
+            x, y = map(int, gaze_point)
+
+            if kalman:
+                prediction = kalman.predict()
+                x_pred, y_pred = map(int, prediction[:2, 0])
+                x_pred = max(0, min(x_pred, screen_width - 1))
+                y_pred = max(0, min(y_pred, screen_height - 1))
+                measurement = np.array([[np.float32(x)], [np.float32(y)]])
+                if not np.any(kalman.statePre):
+                    kalman.statePre[:2] = measurement
+                    kalman.statePost[:2] = measurement
+                kalman.correct(measurement)
+            elif filter_method == "kde":
+                now = time.time()
+                gaze_history.append((now, x, y))
+                gaze_history = [
+                    (t, gx, gy)
+                    for (t, gx, gy) in gaze_history
+                    if now - t <= time_window
+                ]
+                if len(gaze_history) > 1:
+                    arr = np.array([(gx, gy) for (_, gx, gy) in gaze_history])
+                    try:
+                        kde = gaussian_kde(arr.T)
+                        xi, yi = np.mgrid[0:screen_width:320j, 0:screen_height:200j]
+                        zi = (
+                            kde(np.vstack([xi.ravel(), yi.ravel()])).reshape(xi.shape).T
+                        )
+                        flat = zi.ravel()
+                        idx = np.argsort(flat)[::-1]
+                        cdf = np.cumsum(flat[idx]) / flat.sum()
+                        threshold = flat[idx[np.searchsorted(cdf, confidence_level)]]
+                        mask = (zi >= threshold).astype(np.uint8)
+                        mask = cv2.resize(mask, (screen_width, screen_height))
+                        contours, _ = cv2.findContours(
+                            mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+                        )
+                        x_pred = int(np.mean(arr[:, 0]))
+                        y_pred = int(np.mean(arr[:, 1]))
+                    except np.linalg.LinAlgError:
+                        x_pred, y_pred = x, y
+                        contours = []
+                else:
+                    x_pred, y_pred = x, y
+                    contours = []
+            else:
+                x_pred, y_pred = x, y
+                contours = []
+
+            cursor_alpha = min(cursor_alpha + cursor_step, 1.0)
+        else:
+            x_pred = y_pred = None
+            blink_detected = True
+            contours = []
+            cursor_alpha = max(cursor_alpha - cursor_step, 0.0)
+
+        canvas = background.copy()
+
+        if filter_method == "kde" and contours:
+            cv2.drawContours(canvas, contours, -1, (15, 182, 242), 5)
+
+        if x_pred is not None and y_pred is not None and cursor_alpha > 0:
+            overlay = canvas.copy()
+            cv2.circle(overlay, (x_pred, y_pred), 30, (0, 0, 255), -1)
+            cv2.circle(overlay, (x_pred, y_pred), 25, (255, 255, 255), -1)
+            cv2.addWeighted(
+                overlay, cursor_alpha * 0.6, canvas, 1 - cursor_alpha * 0.6, 0, canvas
+            )
+
+        small = cv2.resize(frame, (cam_width, cam_height))
+        thumb = cv2.copyMakeBorder(
+            small,
+            BORDER,
+            BORDER,
+            BORDER,
+            BORDER,
+            cv2.BORDER_CONSTANT,
+            value=(255, 255, 255),
+        )
+        h, w = thumb.shape[:2]
+        canvas[-h - MARGIN : -MARGIN, -w - MARGIN : -MARGIN] = thumb
+
+        now = time.time()
+        fps = 1 / (now - prev_time)
+        prev_time = now
+
+        cv2.putText(
+            canvas,
+            f"FPS: {int(fps)}",
+            (50, 50),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.2,
+            (255, 255, 255),
+            2,
+            cv2.LINE_AA,
+        )
+        blink_txt = "Blinking" if blink_detected else "Not Blinking"
+        blink_clr = (0, 0, 255) if blink_detected else (0, 255, 0)
+        cv2.putText(
+            canvas,
+            blink_txt,
+            (50, 100),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.2,
+            blink_clr,
+            2,
+            cv2.LINE_AA,
+        )
+
+        cv2.imshow("Gaze Estimation", canvas)
+        if cv2.waitKey(1) == 27:
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    run_demo()
--- a/src/eyetrax/app/virtualcam.py
+++ b/src/eyetrax/app/virtualcam.py
@@ -2,11 +2,12 @@ import argparse
 import time
 import cv2
 import numpy as np
-import tkinter as tk
 import pyvirtualcam
 from scipy.stats import gaussian_kde
-from gaze_estimator import GazeEstimator
-from calibration import (
+
+from eyetrax.utils.screen import get_screen_size
+from eyetrax.gaze import GazeEstimator
+from eyetrax.calibration import (
    run_9_point_calibration,
    run_5_point_calibration,
    run_lissajous_calibration,
@@ -14,10 +15,8 @@ from calibration import (
 )


-def main():
-    parser = argparse.ArgumentParser(
-        description="Virtual Camera Gaze Overlay (v4l2loopback)"
-    )
+def run_virtualcam():
+    parser = argparse.ArgumentParser(description="Virtual Camera Gaze Overlay")
    parser.add_argument("--filter", choices=["kalman", "kde", "none"], default="none")
    parser.add_argument("--camera", type=int, default=0)
    parser.add_argument(
@@ -46,16 +45,13 @@ def main():
        kalman.transitionMatrix = np.array(
            [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
        )
-        kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 10
-        kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1
+        kalman.processNoiseCov = np.eye(4, dtype=np.float32) * 50
+        kalman.measurementNoiseCov = np.eye(2, dtype=np.float32) * 0.2
        kalman.statePre = np.zeros((4, 1), np.float32)
        kalman.statePost = np.zeros((4, 1), np.float32)
        fine_tune_kalman_filter(gaze_estimator, kalman, camera_index=camera_index)

-    root = tk.Tk()
-    screen_width = root.winfo_screenwidth()
-    screen_height = root.winfo_screenheight()
-    root.destroy()
+    screen_width, screen_height = get_screen_size()

    cap = cv2.VideoCapture(camera_index)
    if not cap.isOpened():
@@ -71,15 +67,15 @@ def main():

    gaze_history = []
    time_window = 0.5
-    prev_time = time.time()
-    mask_prev = None
-    mask_next = None
+    mask_prev = mask_next = None
    blend_alpha = 1.0
    contours_cache = []
-    last_kde_x_pred = None
-    last_kde_y_pred = None
+    last_kde_x_pred = last_kde_y_pred = None
    frame_count = 0

+    BORDER = 2
+    MARGIN = 20
+
    with pyvirtualcam.Camera(
        width=screen_width,
        height=screen_height,
@@ -94,49 +90,48 @@ def main():
                continue

            features, blink_detected = gaze_estimator.extract_features(frame)
-            x_pred, y_pred = None, None
+            x_pred = y_pred = None

            if features is not None and not blink_detected:
                gaze_point = gaze_estimator.predict(np.array([features]))[0]
-                x, y = int(gaze_point[0]), int(gaze_point[1])
+                x, y = map(int, gaze_point)

                if kalman and filter_method == "kalman":
                    prediction = kalman.predict()
-                    x_pred = int(prediction[0][0])
-                    y_pred = int(prediction[1][0])
+                    x_pred, y_pred = map(int, prediction[:2, 0])
                    x_pred = max(0, min(x_pred, screen_width - 1))
                    y_pred = max(0, min(y_pred, screen_height - 1))
                    measurement = np.array([[np.float32(x)], [np.float32(y)]])
-                    if np.count_nonzero(kalman.statePre) == 0:
+                    if not np.any(kalman.statePre):
                        kalman.statePre[:2] = measurement
                        kalman.statePost[:2] = measurement
                    kalman.correct(measurement)

                elif filter_method == "kde":
-                    current_time = time.time()
-                    gaze_history.append((current_time, x, y))
+                    now = time.time()
+                    gaze_history.append((now, x, y))
                    gaze_history = [
                        (t, gx, gy)
                        for (t, gx, gy) in gaze_history
-                        if current_time - t <= time_window
+                        if now - t <= time_window
                    ]
                    if len(gaze_history) > 1 and frame_count % 5 == 0:
-                        arr = np.array([[gx, gy] for (_, gx, gy) in gaze_history])
+                        arr = np.array([(gx, gy) for (_, gx, gy) in gaze_history])
                        try:
                            kde = gaussian_kde(arr.T)
                            xi, yi = np.mgrid[0:screen_width:200j, 0:screen_height:120j]
-                            coords = np.vstack([xi.ravel(), yi.ravel()])
-                            zi = kde(coords).reshape(xi.shape).T
-                            zi_flat = zi.flatten()
-                            sort_idx = np.argsort(zi_flat)[::-1]
-                            zi_sorted = zi_flat[sort_idx]
-                            cumsum = np.cumsum(zi_sorted)
-                            cumsum /= cumsum[-1]
-                            idx = np.searchsorted(cumsum, confidence_level)
-                            if idx >= len(zi_sorted):
-                                idx = len(zi_sorted) - 1
-                            threshold = zi_sorted[idx]
-                            mask_new = np.where(zi >= threshold, 1, 0).astype(np.uint8)
+                            zi = (
+                                kde(np.vstack([xi.ravel(), yi.ravel()]))
+                                .reshape(xi.shape)
+                                .T
+                            )
+                            flat = zi.ravel()
+                            idx = np.argsort(flat)[::-1]
+                            cdf = np.cumsum(flat[idx]) / flat.sum()
+                            threshold = flat[
+                                idx[np.searchsorted(cdf, confidence_level)]
+                            ]
+                            mask_new = (zi >= threshold).astype(np.uint8)
                            mask_new = cv2.resize(
                                mask_new, (screen_width, screen_height)
                            )
@@ -169,7 +164,7 @@ def main():
                and mask_next is not None
            ):
                blend_alpha = min(blend_alpha + 0.2, 1.0)
-                blended_mask = cv2.addWeighted(
+                blended = cv2.addWeighted(
                    mask_prev.astype(np.float32),
                    1.0 - blend_alpha,
                    mask_next.astype(np.float32),
@@ -177,10 +172,10 @@ def main():
                    0,
                ).astype(np.uint8)
                kernel2 = np.ones((5, 5), np.uint8)
-                blended_mask = cv2.morphologyEx(blended_mask, cv2.MORPH_OPEN, kernel2)
-                blended_mask = cv2.morphologyEx(blended_mask, cv2.MORPH_CLOSE, kernel2)
+                blended = cv2.morphologyEx(blended, cv2.MORPH_OPEN, kernel2)
+                blended = cv2.morphologyEx(blended, cv2.MORPH_CLOSE, kernel2)
                contours, _ = cv2.findContours(
-                    blended_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS
+                    blended, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS
                )
                contours_cache = contours
                if x_pred is not None and y_pred is not None:
@@ -192,6 +187,19 @@ def main():
            if filter_method != "kde" and x_pred is not None and y_pred is not None:
                cv2.circle(output, (x_pred, y_pred), 10, (0, 0, 255), -1)

+            small = cv2.resize(frame, (cam_width, cam_height))
+            thumb = cv2.copyMakeBorder(
+                small,
+                BORDER,
+                BORDER,
+                BORDER,
+                BORDER,
+                cv2.BORDER_CONSTANT,
+                value=(255, 255, 255),
+            )
+            h, w = thumb.shape[:2]
+            output[-h - MARGIN : -MARGIN, -w - MARGIN : -MARGIN] = thumb
+
            cam.send(output)
            cam.sleep_until_next_frame()
            frame_count += 1
@@ -201,4 +209,4 @@ def main():


 if __name__ == "__main__":
-    main()
+    run_virtualcam()
--- a/src/eyetrax/calibration/init.py
+++ b/src/eyetrax/calibration/init.py
@@ -0,0 +1,13 @@
+from .common import wait_for_face_and_countdown
+from .nine_point import run_9_point_calibration
+from .five_point import run_5_point_calibration
+from .lissajous import run_lissajous_calibration
+from .fine_tune import fine_tune_kalman_filter
+
+__all__ = [
+    "wait_for_face_and_countdown",
+    "run_9_point_calibration",
+    "run_5_point_calibration",
+    "run_lissajous_calibration",
+    "fine_tune_kalman_filter",
+]
--- a/src/eyetrax/calibration/common.py
+++ b/src/eyetrax/calibration/common.py
@@ -0,0 +1,56 @@
+import time
+import cv2
+import numpy as np
+
+
+def wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, dur: int = 2) -> bool:
+    """
+    Waits for a face to be detected (not blinking), then shows a countdown ellipse.
+    """
+    cv2.namedWindow("Calibration", cv2.WND_PROP_FULLSCREEN)
+    cv2.setWindowProperty("Calibration", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
+    fd_start = None
+    countdown = False
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            continue
+        f, blink = gaze_estimator.extract_features(frame)
+        face = f is not None and not blink
+        canvas = np.zeros((sh, sw, 3), dtype=np.uint8)
+        now = time.time()
+        if face:
+            if not countdown:
+                fd_start = now
+                countdown = True
+            elapsed = now - fd_start
+            if elapsed >= dur:
+                return True
+            t = elapsed / dur
+            e = t * t * (3 - 2 * t)
+            ang = 360 * (1 - e)
+            cv2.ellipse(
+                canvas,
+                (sw // 2, sh // 2),
+                (50, 50),
+                0,
+                -90,
+                -90 + ang,
+                (0, 255, 0),
+                -1,
+            )
+        else:
+            countdown = False
+            fd_start = None
+            txt = "Face not detected"
+            fs = 2
+            thick = 3
+            size, _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, fs, thick)
+            tx = (sw - size[0]) // 2
+            ty = (sh + size[1]) // 2
+            cv2.putText(
+                canvas, txt, (tx, ty), cv2.FONT_HERSHEY_SIMPLEX, fs, (0, 0, 255), thick
+            )
+        cv2.imshow("Calibration", canvas)
+        if cv2.waitKey(1) == 27:
+            return False
--- a/src/eyetrax/calibration/fine_tune.py
+++ b/src/eyetrax/calibration/fine_tune.py
@@ -0,0 +1,125 @@
+import time
+import cv2
+import numpy as np
+
+from eyetrax.utils.screen import get_screen_size
+
+
+def fine_tune_kalman_filter(gaze_estimator, kalman, camera_index: int = 0):
+    """
+    Quick fine‑tuning pass to adjust Kalman filter's measurementNoiseCov.
+    """
+    screen_width, screen_height = get_screen_size()
+
+    points_tpl = [
+        (screen_width // 2, screen_height // 4),
+        (screen_width // 4, 3 * screen_height // 4),
+        (3 * screen_width // 4, 3 * screen_height // 4),
+    ]
+
+    points = [
+        dict(
+            position=pos,
+            start_time=None,
+            data_collection_started=False,
+            collection_start_time=None,
+            collected_gaze=[],
+        )
+        for pos in points_tpl
+    ]
+
+    proximity_threshold = screen_width / 5
+    initial_delay = 0.5
+    data_collection_duration = 0.5
+
+    cv2.namedWindow("Fine Tuning", cv2.WND_PROP_FULLSCREEN)
+    cv2.setWindowProperty("Fine Tuning", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
+
+    cap = cv2.VideoCapture(camera_index)
+    gaze_positions = []
+
+    while points:
+        ret, frame = cap.read()
+        if not ret:
+            continue
+
+        features, blink_detected = gaze_estimator.extract_features(frame)
+        canvas = np.zeros((screen_height, screen_width, 3), dtype=np.uint8)
+
+        for point in points:
+            cv2.circle(canvas, point["position"], 20, (0, 255, 0), -1)
+
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        text = "Look at the points until they disappear"
+        size, _ = cv2.getTextSize(text, font, 1.5, 2)
+        cv2.putText(
+            canvas,
+            text,
+            ((screen_width - size[0]) // 2, screen_height - 50),
+            font,
+            1.5,
+            (255, 255, 255),
+            2,
+        )
+
+        now = time.time()
+
+        if features is not None and not blink_detected:
+            gaze_point = gaze_estimator.predict(np.array([features]))[0]
+            gaze_x, gaze_y = map(int, gaze_point)
+            cv2.circle(canvas, (gaze_x, gaze_y), 10, (255, 0, 0), -1)
+
+            for point in points[:]:
+                dx, dy = gaze_x - point["position"][0], gaze_y - point["position"][1]
+                if np.hypot(dx, dy) <= proximity_threshold:
+                    if point["start_time"] is None:
+                        point["start_time"] = now
+                    elapsed = now - point["start_time"]
+
+                    if (
+                        not point["data_collection_started"]
+                        and elapsed >= initial_delay
+                    ):
+                        point["data_collection_started"] = True
+                        point["collection_start_time"] = now
+
+                    if point["data_collection_started"]:
+                        data_elapsed = now - point["collection_start_time"]
+                        point["collected_gaze"].append([gaze_x, gaze_y])
+                        shake = int(5 + (data_elapsed / data_collection_duration) * 20)
+                        shaken = (
+                            point["position"][0]
+                            + int(np.random.uniform(-shake, shake)),
+                            point["position"][1]
+                            + int(np.random.uniform(-shake, shake)),
+                        )
+                        cv2.circle(canvas, shaken, 20, (0, 255, 0), -1)
+                        if data_elapsed >= data_collection_duration:
+                            gaze_positions.extend(point["collected_gaze"])
+                            points.remove(point)
+                    else:
+                        cv2.circle(canvas, point["position"], 25, (0, 255, 255), 2)
+                else:
+                    point.update(
+                        start_time=None,
+                        data_collection_started=False,
+                        collection_start_time=None,
+                        collected_gaze=[],
+                    )
+
+        cv2.imshow("Fine Tuning", canvas)
+        if cv2.waitKey(1) == 27:
+            cap.release()
+            cv2.destroyWindow("Fine Tuning")
+            return
+
+    cap.release()
+    cv2.destroyWindow("Fine Tuning")
+
+    gaze_positions = np.array(gaze_positions)
+    if gaze_positions.shape[0] < 2:
+        return
+
+    var = np.var(gaze_positions, axis=0)
+    var[var == 0] = 1e-4
+    kalman.measurementNoiseCov = np.array([[var[0], 0], [0, var[1]]], dtype=np.float32)
--- a/src/eyetrax/calibration/five_point.py
+++ b/src/eyetrax/calibration/five_point.py
@@ -0,0 +1,77 @@
+import time
+import cv2
+import numpy as np
+
+from eyetrax.utils.screen import get_screen_size
+from eyetrax.calibration.common import wait_for_face_and_countdown
+
+
+def run_5_point_calibration(gaze_estimator, camera_index: int = 0):
+    """
+    Faster five-point calibration
+    """
+    sw, sh = get_screen_size()
+
+    cap = cv2.VideoCapture(camera_index)
+    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
+        cap.release()
+        cv2.destroyAllWindows()
+        return
+
+    mx, my = int(sw * 0.1), int(sh * 0.1)
+    gw, gh = sw - 2 * mx, sh - 2 * my
+    order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2)]
+    pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
+
+    feats, targs = [], []
+    pulse_d, cd_d = 1.0, 1.0
+
+    for _ in range(1):
+        for x, y in pts:
+            ps = time.time()
+            final_radius = 20
+            while True:
+                e = time.time() - ps
+                if e > pulse_d:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
+                final_radius = radius
+                cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+
+            cs = time.time()
+            while True:
+                e = time.time() - cs
+                if e > cd_d:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
+                t = e / cd_d
+                ease = t * t * (3 - 2 * t)
+                ang = 360 * (1 - ease)
+                cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+                ft, blink = gaze_estimator.extract_features(f)
+                if ft is not None and not blink:
+                    feats.append(ft)
+                    targs.append([x, y])
+
+    cap.release()
+    cv2.destroyAllWindows()
+    if feats:
+        gaze_estimator.train(np.array(feats), np.array(targs))
--- a/src/eyetrax/calibration/lissajous.py
+++ b/src/eyetrax/calibration/lissajous.py
@@ -0,0 +1,61 @@
+import time
+import cv2
+import numpy as np
+
+from eyetrax.utils.screen import get_screen_size
+from eyetrax.calibration.common import wait_for_face_and_countdown
+
+
+def run_lissajous_calibration(gaze_estimator, camera_index: int = 0):
+    """
+    Moves a calibration point along a Lissajous curve
+    """
+    sw, sh = get_screen_size()
+
+    cap = cv2.VideoCapture(camera_index)
+    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
+        cap.release()
+        cv2.destroyAllWindows()
+        return
+
+    A, B, a, b, d = sw * 0.4, sh * 0.4, 3, 2, 0
+
+    def curve(t):
+        return (A * np.sin(a * t + d) + sw / 2, B * np.sin(b * t) + sh / 2)
+
+    total_time = 5.0
+    fps = 60
+    frames = int(total_time * fps)
+    feats, targs = [], []
+    acc = 0
+
+    for i in range(frames):
+        frac = i / (frames - 1)
+        spd = 0.3 + 0.7 * np.sin(np.pi * frac)
+        acc += spd / fps
+    end = acc if acc >= 1e-6 else 1e-6
+    acc = 0
+
+    for i in range(frames):
+        frac = i / (frames - 1)
+        spd = 0.3 + 0.7 * np.sin(np.pi * frac)
+        acc += spd / fps
+        t = (acc / end) * (2 * np.pi)
+        ret, f = cap.read()
+        if not ret:
+            continue
+        x, y = curve(t)
+        c = np.zeros((sh, sw, 3), dtype=np.uint8)
+        cv2.circle(c, (int(x), int(y)), 20, (0, 255, 0), -1)
+        cv2.imshow("Calibration", c)
+        if cv2.waitKey(1) == 27:
+            break
+        ft, blink = gaze_estimator.extract_features(f)
+        if ft is not None and not blink:
+            feats.append(ft)
+            targs.append([x, y])
+
+    cap.release()
+    cv2.destroyAllWindows()
+    if feats:
+        gaze_estimator.train(np.array(feats), np.array(targs))
--- a/src/eyetrax/calibration/nine_point.py
+++ b/src/eyetrax/calibration/nine_point.py
@@ -0,0 +1,77 @@
+import time
+import cv2
+import numpy as np
+
+from eyetrax.utils.screen import get_screen_size
+from eyetrax.calibration.common import wait_for_face_and_countdown
+
+
+def run_9_point_calibration(gaze_estimator, camera_index: int = 0):
+    """
+    Standard nine‑point calibration
+    """
+    sw, sh = get_screen_size()
+
+    cap = cv2.VideoCapture(camera_index)
+    if not wait_for_face_and_countdown(cap, gaze_estimator, sw, sh, 2):
+        cap.release()
+        cv2.destroyAllWindows()
+        return
+
+    mx, my = int(sw * 0.1), int(sh * 0.1)
+    gw, gh = sw - 2 * mx, sh - 2 * my
+    order = [(1, 1), (0, 0), (2, 0), (0, 2), (2, 2), (1, 0), (0, 1), (2, 1), (1, 2)]
+    pts = [(mx + int(c * (gw / 2)), my + int(r * (gh / 2))) for (r, c) in order]
+
+    feats, targs = [], []
+    pulse_d, cd_d = 1.0, 1.0
+
+    for _ in range(1):
+        for x, y in pts:
+            ps = time.time()
+            final_radius = 20
+            while True:
+                e = time.time() - ps
+                if e > pulse_d:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                radius = 15 + int(15 * abs(np.sin(2 * np.pi * e)))
+                final_radius = radius
+                cv2.circle(c, (x, y), radius, (0, 255, 0), -1)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+
+            cs = time.time()
+            while True:
+                e = time.time() - cs
+                if e > cd_d:
+                    break
+                r, f = cap.read()
+                if not r:
+                    continue
+                c = np.zeros((sh, sw, 3), dtype=np.uint8)
+                cv2.circle(c, (x, y), final_radius, (0, 255, 0), -1)
+                t = e / cd_d
+                ease = t * t * (3 - 2 * t)
+                ang = 360 * (1 - ease)
+                cv2.ellipse(c, (x, y), (40, 40), 0, -90, -90 + ang, (255, 255, 255), 4)
+                cv2.imshow("Calibration", c)
+                if cv2.waitKey(1) == 27:
+                    cap.release()
+                    cv2.destroyAllWindows()
+                    return
+                ft, blink = gaze_estimator.extract_features(f)
+                if ft is not None and not blink:
+                    feats.append(ft)
+                    targs.append([x, y])
+
+    cap.release()
+    cv2.destroyAllWindows()
+    if feats:
+        gaze_estimator.train(np.array(feats), np.array(targs))
--- a/src/eyetrax/gaze.py
+++ b/src/eyetrax/gaze.py
@@ -63,15 +63,15 @@ class GazeEstimator:
        ]

        mutual_indices = [
-            4,  # Nose
+            4,   # Nose
            10,  # Very top
-            151,  # Forehead
-            9,  # Between brow
-            152,  # Chin
-            234,  # Very left
-            454,  # Very right
+            151, # Forehead
+            9,   # Between brow
+            152, # Chin
+            234, # Very left
+            454, # Very right
            58,  # Left jaw
-            288,  # Right jaw
+            288, # Right jaw
        ]
        # fmt: on

@@ -139,7 +139,6 @@ class GazeEstimator:
        Trains gaze prediction model
        """
        self.variable_scaling = variable_scaling
-
        X_scaled = self.scaler.fit_transform(X)
        if self.variable_scaling is not None:
            X_scaled *= self.variable_scaling
--- a/src/eyetrax/utils/screen.py
+++ b/src/eyetrax/utils/screen.py
@@ -0,0 +1,6 @@
+from screeninfo import get_monitors
+
+
+def get_screen_size():
+    m = get_monitors()[0]
+    return m.width, m.height
--- a/uv.lock
+++ b/uv.lock
				`@@ -1 +0,0 @@`
				`shape_predictor_68_face_landmarks.dat filter=lfs diff=lfs merge=lfs -text`
@@ -1 +1 @@
 .12
 >=3.9