Move metadata to toc header, update fix som docs, add som diagram.

This commit is contained in:
Morgan Dean
2025-07-10 14:50:11 -07:00
parent 6ccf507c41
commit 9f2aa0fc0b
13 changed files with 341 additions and 235 deletions

View File

@@ -5,6 +5,8 @@ pypi: cua-computer
macos: true
windows: true
linux: true
github:
- https://github.com/trycua/cua/tree/main/libs/python/agent
---
import { buttonVariants } from 'fumadocs-ui/components/ui/button';
@@ -61,7 +63,7 @@ pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-posit
### Basic Usage
Here's a simple example to get you started with CUA Agent:
Here's a simple example to get you started with CUA Agent. It instructs the agent to open a text editor and write "Hello World."
```python
from cua_agent import ComputerAgent, AgentLoop, LLM, LLMProvider
@@ -86,6 +88,8 @@ async with Computer() as computer:
### Multi-Step Workflow
This example defines multiple tasks for the agent to complete:
```python
async with Computer() as computer:
# Create agent with your preferred provider
@@ -114,6 +118,8 @@ async with Computer() as computer:
### Alternative Model Providers
You may use different models with the agent library -- below are a couple of alternatives that we already support.
```python
# Anthropic Claude
agent = ComputerAgent(

View File

@@ -5,6 +5,8 @@ pypi: cua-computer-server
macos: true
linux: true
windows: true
github:
- https://github.com/trycua/cua/tree/main/libs/python/computer-server
---
import { buttonVariants } from 'fumadocs-ui/components/ui/button';

View File

@@ -6,6 +6,9 @@ windows: true
linux: true
pypi: cua-computer
npm: '@trycua/computer'
github:
- https://github.com/trycua/cua/tree/main/libs/python/computer
- https://github.com/trycua/cua/tree/main/libs/typescript/computer
---
import { Tabs, Tab } from 'fumadocs-ui/components/tabs';

View File

@@ -6,6 +6,9 @@ npm: '@trycua/core'
macos: true
windows: true
linux: true
github:
- https://github.com/trycua/cua/tree/main/libs/python/core
- https://github.com/trycua/cua/tree/main/libs/typescript/core
---
import { buttonVariants } from 'fumadocs-ui/components/ui/button';

View File

@@ -11,6 +11,12 @@ import { Tabs, Tab } from 'fumadocs-ui/components/tabs';
http://localhost:7777
```
<Callout type="info">
The HTTP API service runs on port `7777` by default. If you'd like to use a
different port, pass the `--port` option during installation or when running
`lume serve`.
</Callout>
## Endpoints
### Create VM

View File

@@ -3,6 +3,8 @@ title: Lume
description: A lightweight Command Line Interface and local API server for creating, running and managing macOS and Linux virtual machines.
macos: true
linux: true
github:
- https://github.com/trycua/cua/tree/main/libs/lume
---
import Link from 'next/link';
@@ -13,7 +15,7 @@ import { ChevronRight } from 'lucide-react';
# Lume
Lume is a lightweight Command Line Interface and local API server for creating, running and managing **macOS and Linux virtual machines** with near-native performance on Apple Silicon, using Apple's `Virtualization.Framework`.
Lume is a lightweight Command Line Interface and local API server for creating, running and managing **macOS and Linux virtual machines** with near-native performance on Apple Silicon, using Apple's [`Virtualization.Framework`](https://developer.apple.com/documentation/virtualization).
## Quick Start
@@ -58,9 +60,9 @@ By default, Lume is installed as a background service that starts automatically
libraries that rely on the Lume API (such as the Computer-Use Agent).
</Callout>
You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/lume/releases), extract it, and install the package manually.
You can also download the `lume.pkg.tar.gz` archive from the [latest release](https://github.com/trycua/cua/releases?q=lume&expanded=true), extract it, and install the package manually.
## Getting Started
## Using Lume
Once installed, you can start using Lume with these common workflows:
@@ -76,6 +78,11 @@ lume run macos-sequoia-vanilla:latest
lume run ubuntu-noble-vanilla:latest
```
<Callout type="info">
We provide [prebuilt VM images](#prebuilt-images) in our [ghcr
registry](https://github.com/orgs/trycua/packages).
</Callout>
</Step>
<Step>
@@ -111,7 +118,18 @@ lume stop my-vm
</Step>
</Steps>
## Usage
## Prebuilt Images
Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages).
| Image | Tag | Description | Logical Size |
| ----------------------- | ------------------- | ----------------------------------------------------------------------------------------------- | ------------ |
| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 20GB |
| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 22GB |
| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 24GB |
| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB |
## Lume CLI
```bash
lume <command>
@@ -212,22 +230,9 @@ Command Options:
--port <port> Port to listen on (default: 7777)
```
## Examples
## Common Workflows
### Prebuilt Images
Pre-built images are available in the registry [ghcr.io/trycua](https://github.com/orgs/trycua/packages).
| Image | Tag | Description | Logical Size |
| ----------------------- | ------------------- | ----------------------------------------------------------------------------------------------- | ------------ |
| `macos-sequoia-vanilla` | `latest`, `15.2` | macOS Sequoia 15.2 image | 20GB |
| `macos-sequoia-xcode` | `latest`, `15.2` | macOS Sequoia 15.2 image with Xcode command line tools | 22GB |
| `macos-sequoia-cua` | `latest`, `15.3` | macOS Sequoia 15.3 image compatible with the Computer interface | 24GB |
| `ubuntu-noble-vanilla` | `latest`, `24.04.1` | [Ubuntu Server for ARM 24.04.1 LTS](https://ubuntu.com/download/server/arm) with Ubuntu Desktop | 20GB |
### Common Workflows
#### Development Environment Setup
### Development Environment Setup
```bash
# Create a development VM with more resources
@@ -237,7 +242,7 @@ lume create dev-vm --cpu 6 --memory 12GB --disk-size 100GB
lume run dev-vm --shared-dir ~/Projects:rw
```
#### Testing Different macOS Versions
### Testing Different macOS Versions
```bash
# Pull and run different macOS versions
@@ -248,7 +253,7 @@ lume run macos-sequoia-vanilla:latest
lume clone my-vm my-vm-test
```
#### File Sharing Examples
### File Sharing Examples
```bash
# Share a read-only directory
@@ -271,7 +276,7 @@ lume serve
```
<Callout type="info">
<p>Read the doucmentation on the API server endpoints.</p>
<p>Read the doucmentation on the local API server.</p>
<Link
href="/home/libraries/lume/http-api"
className={cn(
@@ -280,7 +285,7 @@ lume serve
}),
'no-underline h-10'
)}>
Lume API Documentation
Lume API Server Documentation
<ChevronRight size={18} />
</Link>
</Callout>

View File

@@ -3,6 +3,8 @@ title: Lumier
description: Run macOS and Linux virtual machines effortlessly in Docker containers with browser-based VNC access.
macos: true
linux: true
github:
- https://github.com/trycua/cua/tree/main/libs/lumier
---
import { buttonVariants } from 'fumadocs-ui/components/ui/button';

View File

@@ -5,6 +5,8 @@ pypi: cua-mcp-server
macos: true
linux: true
windows: true
github:
- https://github.com/trycua/cua/tree/main/libs/python/computer-server
---
import { buttonVariants } from 'fumadocs-ui/components/ui/button';
@@ -92,6 +94,7 @@ Once configured, you can ask Claude to perform computer tasks:
"Open Safari and navigate to google.com"
"Click on the search bar and type 'weather'"
```
</Step>
<Step>
@@ -106,6 +109,7 @@ Customize the MCP server behavior with these environment variables:
| `CUA_MODEL_NAME` | Model name to use | None (provider default) |
| `CUA_PROVIDER_BASE_URL` | Base URL for provider API | None |
| `CUA_MAX_IMAGES` | Maximum number of images to keep in context | 3 |
</Step>
</Steps>

View File

@@ -5,6 +5,8 @@ macos: true
windows: true
linux: true
pypi: cua-computer
github:
- https://github.com/trycua/cua/tree/main/libs/python/som
---
import { buttonVariants } from 'fumadocs-ui/components/ui/button';
@@ -32,17 +34,17 @@ import { ChevronRight } from 'lucide-react';
## Installation
<Callout type="warning">
Som requires Python 3.11 or higher. For best performance, use macOS with Apple
Silicon.
</Callout>
### Install from PyPI
```bash
pip install cua-som
```
<Callout type="warning">
Som requires Python 3.11 or higher. For best performance, use macOS with Apple
Silicon.
</Callout>
### Install from Source
```bash
@@ -59,11 +61,10 @@ pip install -e .
### System Requirements
| Platform | Hardware | Detection Time |
| ------------- | ------------------------ | -------------- |
| macOS | Apple Silicon (M1/M2/M3) | ~0.4s |
| Linux/Windows | NVIDIA GPU | ~0.6s |
| Any | CPU only | ~1.3s |
| Platform | Hardware | Detection Time |
| -------- | ------------------------ | -------------- |
| macOS | Apple Silicon (M1/M2/M3) | ~0.4s |
| Any | CPU only | ~1.3s |
## Getting Started
@@ -80,11 +81,19 @@ parser = OmniParser()
# Load and process an image
image = Image.open("screenshot.png")
result = parser.parse(image)
result = parser.parse(
image,
box_threshold=0.3, # Confidence threshold
iou_threshold=0.1, # Overlap threshold
use_ocr=True # Enable text detection
)
# Print detected elements
for elem in result.elements:
print(f"{elem.type}: {elem.content or 'icon'} at {elem.bbox.coordinates}")
if elem.type == "icon":
print(f"Icon: confidence={elem.confidence:.3f}, bbox={elem.bbox.coordinates}")
else: # text
print(f"Text: '{elem.content}', confidence={elem.confidence:.3f}")
```
### Advanced Configuration
@@ -97,187 +106,87 @@ result = parser.parse(
box_threshold=0.3, # Confidence threshold (0.0-1.0)
iou_threshold=0.1, # Overlap threshold (0.0-1.0)
use_ocr=True, # Enable text detection
ocr_engine="easyocr" # OCR engine choice
)
```
### Working with Results
```python
# Filter by element type
icons = [e for e in result.elements if e.type == "icon"]
texts = [e for e in result.elements if e.type == "text"]
# Get high-confidence detections
high_conf = [e for e in result.elements if e.confidence > 0.8]
# Access bounding boxes
for elem in result.elements:
x, y, w, h = elem.bbox.coordinates
print(f"Element at ({x}, {y}) with size {w}x{h}")
```
<Callout>
💡 **Tip**: Check out our [interactive examples](#examples) below to see Som
in action with real UI screenshots.
</Callout>
## Configuration Guide
### Detection Parameters
### Box Thresholds
<Cards>
<Card
title="Box Threshold"
description="Controls detection confidence (default: 0.3)">
- **Higher values (0.4-0.5)**: More precise, fewer false positives - **Lower
values (0.1-0.2)**: More detections, may include noise - **Recommended**:
0.3 for balanced performance
</Card>
Controls detection confidence (default: 0.3)
<Card
title="IOU Threshold"
description="Controls overlap handling (default: 0.1)">
- **Lower values (0.05-0.1)**: Aggressive merging of overlaps - **Higher
values (0.3-0.5)**: Keeps more overlapping boxes - **Recommended**: 0.1 for
dense UIs
</Card>
</Cards>
- **Higher values (0.4-0.5)**: More precise, fewer false positives
- **Lower values (0.1 - 0.2)**: More detections, may include noise
- **Recommended**: 0.3 for balanced performance
### OCR Settings
### Intersection Over Union (IOU) Thresholds
| Setting | Default | Description |
| -------------- | --------- | ------------------------ |
| Engine | `easyocr` | OCR engine to use |
| Languages | `['en']` | Supported languages |
| GPU | `auto` | Enable GPU acceleration |
| Timeout | `5s` | Maximum processing time |
| Min Confidence | `0.5` | Text detection threshold |
Set the `iou_threshold` parameter to control when overlapping element boxes should be merged into a single detection. A value of 0.1-0.2 is recommended for most use cases. Higher values will require more overlap before merging occurs.
## Performance Benchmarks
<div class="flex gap-x-6">
### Hardware Acceleration
<Mermaid
chart="
graph LR
A[Input Image] --> B{Hardware Detection}
B -->|Apple Silicon| C[MPS Backend]
B -->|NVIDIA GPU| D[CUDA Backend]
B -->|No GPU| E[CPU Backend]
C --> F[Multi-scale\n~0.4s]
D --> G[Multi-scale\n~0.6s]
E --> H[Single-scale\n~1.3s]
"
<IOU
title="Low Overlap (Keep Both)"
description="When boxes have minimal overlap (IOU ~ 0.05), both detections are kept as separate elements."
rect1={{
left: 30,
top: 30,
width: 60,
height: 50,
fill: 'rgba(0, 0, 255, 0.6)',
name: 'box1',
}}
rect2={{
left: 80,
top: 70,
width: 60,
height: 50,
fill: 'rgba(255, 165, 0, 0.6)',
name: 'box2',
}}
/>
### Optimization Details
<IOU
title="High Overlap (Merge)"
description="When boxes significant overlap (IOU ~ 0.4), they are merged into a single detection to avoid duplicates."
rect1={{
left: 30,
top: 30,
width: 80,
height: 60,
fill: 'rgba(0, 0, 255, 0.6)',
name: 'box1',
}}
rect2={{
left: 50,
top: 40,
width: 80,
height: 60,
fill: 'rgba(255, 165, 0, 0.6)',
name: 'box2',
}}
/>
</div>
## Performance
<Cards>
<Card title="MPS (Apple Silicon)" description="Best performance on macOS">
- Multi-scale detection (640px, 1280px, 1920px) - Test-time augmentation -
Half-precision (FP16) - ~0.4s average time
<Card title="Metal Performance Shaders (Apple Silicon)" description="Best performance on macOS">
- Multi-scale detection (640px, 1280px, 1920px)
- Test-time augmentation enabled
- Half-precision (FP16)
- ~0.4s average detection time
- Best for production use
</Card>
{' '}
<Card title="CUDA (NVIDIA)" description="High performance on Linux/Windows">
- Multi-scale detection - Mixed precision - ~0.6s average time
</Card>
<Card title="CPU Fallback" description="Universal compatibility">
- Single-scale detection (1280px) - Full precision (FP32) - ~1.3s average
time
- Single-scale detection (1280px)
- Full precision (FP32)
- ~1.3s average time
- Reliable fallback option
</Card>
</Cards>
## Examples
### Example 1: Screenshot Analysis
Analyze a full application screenshot with visualization:
```python
from som import OmniParser
from PIL import Image
import json
# Initialize parser
parser = OmniParser()
# Load screenshot
image = Image.open("app_screenshot.png")
# Parse with visualization
result = parser.parse(image, use_ocr=True)
# Save annotated image
result.visualized_image.save("annotated_screenshot.png")
# Export results as JSON
with open("ui_elements.json", "w") as f:
json.dump(result.to_dict(), f, indent=2)
```
### Example 2: UI Automation Helper
Find and interact with specific UI elements:
```python
def find_button(parser, image, button_text):
"""Find a button by its text content"""
result = parser.parse(image, use_ocr=True)
for elem in result.elements:
if elem.type == "text" and button_text.lower() in elem.content.lower():
return elem.bbox.coordinates
return None
# Usage
button_coords = find_button(parser, screenshot, "Submit")
if button_coords:
x, y, w, h = button_coords
click_x = x + w // 2
click_y = y + h // 2
print(f"Click at ({click_x}, {click_y})")
```
### Example 3: Batch Processing
Process multiple screenshots with performance tracking:
```python
import time
from pathlib import Path
def batch_process(image_dir, output_dir):
parser = OmniParser()
image_files = Path(image_dir).glob("*.png")
results = []
for img_path in image_files:
start = time.time()
image = Image.open(img_path)
result = parser.parse(image)
# Save annotated version
out_path = Path(output_dir) / f"{img_path.stem}_annotated.png"
result.visualized_image.save(out_path)
elapsed = time.time() - start
results.append({
"file": img_path.name,
"elements_found": len(result.elements),
"processing_time": elapsed
})
print(f"Processed {img_path.name}: {len(result.elements)} elements in {elapsed:.2f}s")
return results
```
---
<Callout type="info">

View File

@@ -13,6 +13,7 @@ export const docs = defineDocs({
schema: frontmatterSchema.extend({
pypi: z.string().optional(),
npm: z.string().optional(),
github: z.array(z.string()).optional(),
macos: z.boolean().default(false),
windows: z.boolean().default(false),
linux: z.boolean().default(false),

View File

@@ -14,7 +14,7 @@ import {
DocsTitle,
} from 'fumadocs-ui/page';
import { cn } from 'fumadocs-ui/utils/cn';
import { ChevronDown } from 'lucide-react';
import { ChevronDown, CodeXml, ExternalLink } from 'lucide-react';
import Link from 'next/link';
import { notFound, redirect } from 'next/navigation';
@@ -46,6 +46,7 @@ export default async function Page(props: {
const linux = page.data.linux;
const pypi = page.data.pypi;
const npm = page.data.npm;
const github = page.data.github;
const MDXContent = page.data.body;
@@ -55,9 +56,9 @@ export default async function Page(props: {
if (!hasAnyPlatform && !pypi) return null;
return (
<div className="flex flex-col gap-2 mx-4">
<div className="flex flex-col gap-2">
{hasAnyPlatform && (
<div className="flex flex-row gap-2 items-center mx-auto dark:text-neutral-400">
<div className="flex flex-row gap-2 items-left dark:text-neutral-400">
{windows && (
<svg
xmlns="http://www.w3.org/2000/svg"
@@ -90,49 +91,106 @@ export default async function Page(props: {
)}
</div>
)}
{pypi && (
<a target="_blank" href={`https://pypi.org/project/${pypi}/`}>
<img
src={`https://img.shields.io/pypi/v/${pypi}?color=blue`}
className="h-5"
alt="PyPI"
/>
</a>
)}
{npm && (
<a target="_blank" href={`https://www.npmjs.com/package/${npm}`}>
<img
src={`https://img.shields.io/npm/v/${npm}?color=bf4c4b`}
className="h-5"
alt="NPM"
/>
</a>
)}
{slug.includes('libraries') && (
<a
className={cn(
buttonVariants({
color: 'secondary',
size: 'sm',
className:
'gap-2 [&_svg]:size-3.5 [&_svg]:text-fd-muted-foreground',
}),
''
)}
href={`/api/${page.data.title.toLowerCase()}`}>
Reference
</a>
)}
<div className="flex flex-row gap-2 items-left">
{pypi && (
<a target="_blank" href={`https://pypi.org/project/${pypi}/`}>
<img
src={`https://img.shields.io/pypi/v/${pypi}?color=blue`}
className="h-5"
alt="PyPI"
/>
</a>
)}
{npm && (
<a target="_blank" href={`https://www.npmjs.com/package/${npm}`}>
<img
src={`https://img.shields.io/npm/v/${npm}?color=bf4c4b`}
className="h-5"
alt="NPM"
/>
</a>
)}
</div>
</div>
);
};
const tocHeader = () => {
return (
<div className="w-fit">
<PlatformIcons />
<div className="flex gap-2 mt-2">
{github &&
github.length > 0 &&
(github.length === 1 ? (
<a
href={github[0]}
rel="noreferrer noopener"
target="_blank"
className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&amp;_svg]:size-5 text-fd-muted-foreground md:[&amp;_svg]:size-4.5"
aria-label="Source"
data-active="false">
<svg role="img" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path>
</svg>
Source
<ExternalLink className="w-4 h-4 ml-auto" />
</a>
) : (
<Popover>
<PopoverTrigger className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&_svg]:size-5 text-fd-muted-foreground md:[&_svg]:size-4.5">
<svg role="img" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path>
</svg>
Source
<ChevronDown className="h-4 w-4" />
</PopoverTrigger>
<PopoverContent className="w-48 p-1">
<div className="flex flex-col gap-1">
{github.map((link, index) => (
<a
key={index}
href={link}
rel="noreferrer noopener"
target="_blank"
className="inline-flex gap-2 w-full items-center rounded-md p-2 text-sm hover:bg-fd-accent hover:text-fd-accent-foreground">
{link.includes('python')
? 'Python'
: link.includes('typescript')
? 'TypeScript'
: `Source ${index + 1}`}
<ExternalLink className="w-4 h-4 ml-auto" />
</a>
))}
</div>
</PopoverContent>
</Popover>
))}
{slug.includes('libraries') && (
<a
className="inline-flex gap-2 w-fit items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&amp;_svg]:size-5 text-fd-muted-foreground md:[&amp;_svg]:size-4.5"
href={`/api/${page.data.title.toLowerCase()}`}>
<CodeXml size={12} />
Reference
</a>
)}
</div>
<hr className="my-2 border-t border-fd-border" />
</div>
);
};
return (
<DocsPage toc={page.data.toc} full={page.data.full}>
<DocsPage
toc={page.data.toc}
tableOfContent={{ header: tocHeader() }}
full={page.data.full}>
<div className="flex flex-row w-full items-start">
<div className="flex-1">
<div className="flex flex-row w-full">
<DocsTitle>{page.data.title}</DocsTitle>
<div className="ml-auto">
{apiSection && versionItems.length > 1 && (
<Popover>
@@ -193,11 +251,10 @@ export default async function Page(props: {
)}
</div>
</div>
<DocsDescription className=" text-md mt-1">
<DocsDescription className="text-md mt-1">
{page.data.description}
</DocsDescription>
</div>
<PlatformIcons />
</div>
<DocsBody>
<MDXContent

106
docs/src/components/iou.tsx Normal file
View File

@@ -0,0 +1,106 @@
'use client';
import React, { useRef, useEffect, useState, useCallback } from 'react';
interface Rectangle {
left: number;
top: number;
width: number;
height: number;
fill: string;
name: string;
}
interface IOUProps {
title: string;
description: string;
rect1: Rectangle;
rect2: Rectangle;
}
export default function IOU({ title, description, rect1, rect2 }: IOUProps) {
const canvasRef = useRef<HTMLCanvasElement>(null);
const [actualIOU, setActualIOU] = useState<number>(0);
const getBbox = (rect: Rectangle) => ({
left: rect.left,
right: rect.left + rect.width,
top: rect.top,
bottom: rect.top + rect.height,
});
const calcIntersection = (bbox1: any, bbox2: any): number => {
const x1 = Math.max(bbox1.left, bbox2.left);
const x2 = Math.min(bbox1.right, bbox2.right);
const y1 = Math.max(bbox1.top, bbox2.top);
const y2 = Math.min(bbox1.bottom, bbox2.bottom);
// Check if there's actually an overlap
if (x2 <= x1 || y2 <= y1) {
return 0;
}
const intersection = (x2 - x1) * (y2 - y1);
return intersection;
};
const calcArea = (rect: Rectangle): number => {
return rect.width * rect.height;
};
const drawCanvas = useCallback(() => {
const canvas = canvasRef.current;
if (!canvas) return;
const ctx = canvas.getContext('2d');
if (!ctx) return;
// Clear canvas
ctx.clearRect(0, 0, canvas.width, canvas.height);
// Calculate IOU
const bbox1 = getBbox(rect1);
const bbox2 = getBbox(rect2);
const intersection = calcIntersection(bbox1, bbox2);
const union = calcArea(rect1) + calcArea(rect2) - intersection;
const iou = intersection / union;
setActualIOU(iou);
// Draw rectangles
[rect1, rect2].forEach((rect) => {
ctx.fillStyle = rect.fill;
ctx.fillRect(rect.left, rect.top, rect.width, rect.height);
ctx.strokeStyle = '#000';
ctx.lineWidth = 2;
ctx.strokeRect(rect.left, rect.top, rect.width, rect.height);
ctx.fillStyle = '#000';
ctx.font = '12px';
ctx.fillText(rect.name, rect.left + 5, rect.top + 15);
});
}, [rect1, rect2]);
useEffect(() => {
drawCanvas();
}, [drawCanvas]);
return (
<div className="">
<h3 className="text-sm font-semibold ">{title}</h3>
<div className="flex items-start gap-6">
<div>
<canvas
ref={canvasRef}
width={200}
height={150}
className="border bg-white rounded-md"
/>
<div className="mt-2 text-sm">
<div className="font-mono mb-2">IOU = {actualIOU.toFixed(3)}</div>
<span className="">{description}</span>
</div>
</div>
</div>
</div>
);
}

View File

@@ -1,12 +1,14 @@
import defaultMdxComponents from 'fumadocs-ui/mdx';
import type { MDXComponents } from 'mdx/types';
import { Mermaid } from './components/mermaid';
import IOU from './components/iou';
// use this function to get MDX components, you will need it for rendering MDX
export function getMDXComponents(components?: MDXComponents): MDXComponents {
return {
...defaultMdxComponents,
Mermaid,
IOU,
...components,
};
}