fixed hitbox logic

This commit is contained in:
Dillon DuPont
2025-05-13 12:50:53 -04:00
parent a539000a7f
commit c8b3ce1e26
2 changed files with 114 additions and 7 deletions

View File

@@ -73,6 +73,7 @@ class Diorama:
class Interface:
def __init__(self, diorama):
self._diorama = diorama
self.hitboxes = []
async def screenshot(self, save_to_disk=False, output_dir=None, take_focus=True):
Diorama._ensure_scheduler()
@@ -89,13 +90,114 @@ class Diorama:
},
"future": future
})
return await future
result, img = await future
# Store hitboxes after screenshot
self.hitboxes = result.get("hitboxes", [])
return result, img
async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
"""
Convert screenshot-relative coordinates (x, y) to absolute screen coordinates.
Find the first hitbox whose 'hitbox' contains the mapped (abs_x, abs_y).
If none found, return input.
"""
if not self.hitboxes:
await self.screenshot() # get hitboxes
# Try all hitboxes
for h in self.hitboxes:
rect = h.get("hitbox")
if not rect or len(rect) != 4:
continue
x0, y0, x1, y1 = rect
width = x1 - x0
height = y1 - y0
abs_x = x0 + x * width
abs_y = y0 + y * height
# Check if (abs_x, abs_y) is inside this hitbox
if x0 <= abs_x <= x1 and y0 <= abs_y <= y1:
return abs_x, abs_y
return x, y
async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, float]:
"""
Convert absolute screen coordinates (x, y) to screenshot-relative coordinates (normalized to [0, 1]).
Find the first hitbox whose 'target' contains (x, y).
If none found, return input.
"""
if not self.hitboxes:
await self.screenshot() # get hitboxes
for h in self.hitboxes:
rect = h.get("target")
if not rect or len(rect) != 4:
continue
x0, y0, x1, y1 = rect
width = x1 - x0
height = y1 - y0
if x0 <= x <= x1 and y0 <= y <= y1:
rel_x = (x - x0) / width if width else 0.0
rel_y = (y - y0) / height if height else 0.0
return rel_x, rel_y
return x, y
async def main():
from PIL import Image, ImageDraw
from draw import capture_all_apps
desktop1 = Diorama.create_from_apps(["Discord", "Notes"])
desktop2 = Diorama.create_from_apps(["Google Chrome"])
await desktop1.interface.screenshot()
await desktop2.interface.screenshot()
# Take full screen screenshot (no app whitelist)
result_full, img_full = capture_all_apps()
# Take desktop1 screenshot
result1, img1 = await desktop1.interface.screenshot()
# Pick a sample normalized screenshot coordinate
test_screenshot_coord = (0.5, 0.5) # center
# Convert to screen coordinates using desktop1 (should map to full screenshot)
screen_coord = await desktop1.interface.to_screen_coordinates(*test_screenshot_coord)
# Convert back to screenshot coordinates on desktop1
screenshot_coord_back = await desktop1.interface.to_screenshot_coordinates(*screen_coord)
# Draw on full screenshot: the mapped screen coordinate
img_full = img_full.convert("RGBA")
img1 = img1.convert("RGBA")
width_full, height_full = img_full.size
width1, height1 = img1.size
x_screen, y_screen = int(screen_coord[0]), int(screen_coord[1])
x1, y1 = int(screenshot_coord_back[0] * width1), int(screenshot_coord_back[1] * height1)
draw_full = ImageDraw.Draw(img_full)
r = 12
draw_full.ellipse([(x_screen - r, y_screen - r), (x_screen + r, y_screen + r)], fill=(255,0,0,200), outline=(0,0,0,255))
draw_full.text((x_screen + r, y_screen), "screen coord", fill=(255,0,0,255))
draw1 = ImageDraw.Draw(img1)
draw1.ellipse([(x1 - r, y1 - r), (x1 + r, y1 + r)], fill=(0,0,255,200), outline=(0,0,0,255))
draw1.text((x1 + r, y1), f"screenshot coord", fill=(0,0,255,255))
# Create a new image side by side
total_width = img_full.width + img1.width
max_height = max(img_full.height, img1.height)
combined = Image.new("RGBA", (total_width, max_height), (255,255,255,255))
combined.paste(img_full, (0, 0))
combined.paste(img1, (img_full.width, 0))
# Draw an arrow from the point in img_full to the point in img1
arrow_draw = ImageDraw.Draw(combined)
start = (x_screen, y_screen)
end = (x1 + img_full.width, y1)
arrow_draw.line([start, end], fill=(0,128,0,255), width=3)
# Arrowhead
def draw_arrowhead(draw, start, end, color, size=15):
import math
angle = math.atan2(end[1] - start[1], end[0] - start[0])
for a in [math.pi/8, -math.pi/8]:
x = end[0] - size * math.cos(angle + a)
y = end[1] - size * math.sin(angle + a)
draw.line([end, (x, y)], fill=color, width=3)
draw_arrowhead(arrow_draw, start, end, (0,128,0,255))
combined.save("coord_mapping_demo.png")
print("Saved coordinate mapping demo to coord_mapping_demo.png")
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -465,8 +465,13 @@ def draw_desktop_screenshot(app_whitelist: List[str] = None, all_windows: List[D
_draw_layer(cg_context, first_pass_windows, app_source_rect, app_target_rect)
hitboxes.append({
"hitbox": [app_source_rect.origin.x, app_source_rect.origin.y, app_source_rect.size.width, app_source_rect.size.height],
"target": [app_target_rect.origin.x, app_target_rect.origin.y, app_target_rect.size.width, app_target_rect.size.height]
"hitbox": [0, 0, app_bounds["width"], app_bounds["height"]],
"target": [
app_source_rect.origin.x,
app_source_rect.origin.y,
app_source_rect.origin.x + app_bounds["width"],
app_source_rect.origin.y + app_bounds["height"]
]
})
# --- SECOND PASS: menubar ---