From c8b3ce1e26bedab04dc1c981285e66630e9e2123 Mon Sep 17 00:00:00 2001 From: Dillon DuPont Date: Tue, 13 May 2025 12:50:53 -0400 Subject: [PATCH] fixed hitbox logic --- notebooks/diorama/diorama.py | 112 +++++++++++++++++++++++++++++++++-- notebooks/diorama/draw.py | 9 ++- 2 files changed, 114 insertions(+), 7 deletions(-) diff --git a/notebooks/diorama/diorama.py b/notebooks/diorama/diorama.py index 5b0a41b3..b0234b03 100644 --- a/notebooks/diorama/diorama.py +++ b/notebooks/diorama/diorama.py @@ -73,6 +73,7 @@ class Diorama: class Interface: def __init__(self, diorama): self._diorama = diorama + self.hitboxes = [] async def screenshot(self, save_to_disk=False, output_dir=None, take_focus=True): Diorama._ensure_scheduler() @@ -89,13 +90,114 @@ class Diorama: }, "future": future }) - return await future + result, img = await future + # Store hitboxes after screenshot + self.hitboxes = result.get("hitboxes", []) + return result, img + + async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]: + """ + Convert screenshot-relative coordinates (x, y) to absolute screen coordinates. + Find the first hitbox whose 'hitbox' contains the mapped (abs_x, abs_y). + If none found, return input. + """ + if not self.hitboxes: + await self.screenshot() # get hitboxes + # Try all hitboxes + for h in self.hitboxes: + rect = h.get("hitbox") + if not rect or len(rect) != 4: + continue + x0, y0, x1, y1 = rect + width = x1 - x0 + height = y1 - y0 + abs_x = x0 + x * width + abs_y = y0 + y * height + # Check if (abs_x, abs_y) is inside this hitbox + if x0 <= abs_x <= x1 and y0 <= abs_y <= y1: + return abs_x, abs_y + return x, y + + async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]: + """ + Convert absolute screen coordinates (x, y) to screenshot-relative coordinates (normalized to [0, 1]). + Find the first hitbox whose 'target' contains (x, y). + If none found, return input. + """ + if not self.hitboxes: + await self.screenshot() # get hitboxes + for h in self.hitboxes: + rect = h.get("target") + if not rect or len(rect) != 4: + continue + x0, y0, x1, y1 = rect + width = x1 - x0 + height = y1 - y0 + if x0 <= x <= x1 and y0 <= y <= y1: + rel_x = (x - x0) / width if width else 0.0 + rel_y = (y - y0) / height if height else 0.0 + return rel_x, rel_y + return x, y async def main(): + from PIL import Image, ImageDraw + from draw import capture_all_apps + desktop1 = Diorama.create_from_apps(["Discord", "Notes"]) - desktop2 = Diorama.create_from_apps(["Google Chrome"]) - await desktop1.interface.screenshot() - await desktop2.interface.screenshot() - + + # Take full screen screenshot (no app whitelist) + result_full, img_full = capture_all_apps() + # Take desktop1 screenshot + result1, img1 = await desktop1.interface.screenshot() + + # Pick a sample normalized screenshot coordinate + test_screenshot_coord = (0.5, 0.5) # center + # Convert to screen coordinates using desktop1 (should map to full screenshot) + screen_coord = await desktop1.interface.to_screen_coordinates(*test_screenshot_coord) + # Convert back to screenshot coordinates on desktop1 + screenshot_coord_back = await desktop1.interface.to_screenshot_coordinates(*screen_coord) + + # Draw on full screenshot: the mapped screen coordinate + img_full = img_full.convert("RGBA") + img1 = img1.convert("RGBA") + width_full, height_full = img_full.size + width1, height1 = img1.size + x_screen, y_screen = int(screen_coord[0]), int(screen_coord[1]) + x1, y1 = int(screenshot_coord_back[0] * width1), int(screenshot_coord_back[1] * height1) + + draw_full = ImageDraw.Draw(img_full) + r = 12 + draw_full.ellipse([(x_screen - r, y_screen - r), (x_screen + r, y_screen + r)], fill=(255,0,0,200), outline=(0,0,0,255)) + draw_full.text((x_screen + r, y_screen), "screen coord", fill=(255,0,0,255)) + + draw1 = ImageDraw.Draw(img1) + draw1.ellipse([(x1 - r, y1 - r), (x1 + r, y1 + r)], fill=(0,0,255,200), outline=(0,0,0,255)) + draw1.text((x1 + r, y1), f"screenshot coord", fill=(0,0,255,255)) + + # Create a new image side by side + total_width = img_full.width + img1.width + max_height = max(img_full.height, img1.height) + combined = Image.new("RGBA", (total_width, max_height), (255,255,255,255)) + combined.paste(img_full, (0, 0)) + combined.paste(img1, (img_full.width, 0)) + + # Draw an arrow from the point in img_full to the point in img1 + arrow_draw = ImageDraw.Draw(combined) + start = (x_screen, y_screen) + end = (x1 + img_full.width, y1) + arrow_draw.line([start, end], fill=(0,128,0,255), width=3) + # Arrowhead + def draw_arrowhead(draw, start, end, color, size=15): + import math + angle = math.atan2(end[1] - start[1], end[0] - start[0]) + for a in [math.pi/8, -math.pi/8]: + x = end[0] - size * math.cos(angle + a) + y = end[1] - size * math.sin(angle + a) + draw.line([end, (x, y)], fill=color, width=3) + draw_arrowhead(arrow_draw, start, end, (0,128,0,255)) + + combined.save("coord_mapping_demo.png") + print("Saved coordinate mapping demo to coord_mapping_demo.png") + if __name__ == "__main__": asyncio.run(main()) diff --git a/notebooks/diorama/draw.py b/notebooks/diorama/draw.py index 664c461e..3903479c 100644 --- a/notebooks/diorama/draw.py +++ b/notebooks/diorama/draw.py @@ -465,8 +465,13 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D _draw_layer(cg_context, first_pass_windows, app_source_rect, app_target_rect) hitboxes.append({ - "hitbox": [app_source_rect.origin.x, app_source_rect.origin.y, app_source_rect.size.width, app_source_rect.size.height], - "target": [app_target_rect.origin.x, app_target_rect.origin.y, app_target_rect.size.width, app_target_rect.size.height] + "hitbox": [0, 0, app_bounds["width"], app_bounds["height"]], + "target": [ + app_source_rect.origin.x, + app_source_rect.origin.y, + app_source_rect.origin.x + app_bounds["width"], + app_source_rect.origin.y + app_bounds["height"] + ] }) # --- SECOND PASS: menubar ---