feat: enhance coordinate-based vision tools with advanced mouse interactions
Phase 3 implementation adds sophisticated mouse automation capabilities: Enhanced Tools: - mouseMove: precision control (pixel/subpixel), timing delays - mouseClick: multi-button support, click counts (1-3), hold times - mouseDrag: advanced patterns (direct/smooth/bezier), configurable steps/duration New Tools: - mouseScroll: directional scrolling with smooth animation - mouseGesture: complex multi-point gestures with per-point actions Technical Features: - Subpixel coordinate precision for high-accuracy positioning - Mathematical interpolation (smoothstep, bezier curves) - Intelligent smooth scrolling with automatic step calculation - Comprehensive schema validation with sensible parameter limits - Clean Playwright code generation with precision-aware formatting All tools pass comprehensive testing with proper error handling, capability gating (vision), and production-ready implementation quality.
This commit is contained in:
parent
b9285cac62
commit
0927c85ec0
@ -21,25 +21,37 @@ const elementSchema = z.object({
|
|||||||
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const coordinateSchema = z.object({
|
||||||
|
x: z.number().describe('X coordinate'),
|
||||||
|
y: z.number().describe('Y coordinate'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const advancedCoordinateSchema = coordinateSchema.extend({
|
||||||
|
precision: z.enum(['pixel', 'subpixel']).optional().default('pixel').describe('Coordinate precision level'),
|
||||||
|
delay: z.number().min(0).max(5000).optional().describe('Delay in milliseconds before action'),
|
||||||
|
});
|
||||||
|
|
||||||
const mouseMove = defineTabTool({
|
const mouseMove = defineTabTool({
|
||||||
capability: 'vision',
|
capability: 'vision',
|
||||||
schema: {
|
schema: {
|
||||||
name: 'browser_mouse_move_xy',
|
name: 'browser_mouse_move_xy',
|
||||||
title: 'Move mouse',
|
title: 'Move mouse',
|
||||||
description: 'Move mouse to a given position',
|
description: 'Move mouse to a given position with optional precision and timing control',
|
||||||
inputSchema: elementSchema.extend({
|
inputSchema: elementSchema.extend(advancedCoordinateSchema.shape),
|
||||||
x: z.number().describe('X coordinate'),
|
|
||||||
y: z.number().describe('Y coordinate'),
|
|
||||||
}),
|
|
||||||
type: 'readOnly',
|
type: 'readOnly',
|
||||||
},
|
},
|
||||||
|
|
||||||
handle: async (tab, params, response) => {
|
handle: async (tab, params, response) => {
|
||||||
response.addCode(`// Move mouse to (${params.x}, ${params.y})`);
|
const { x, y, precision, delay } = params;
|
||||||
response.addCode(`await page.mouse.move(${params.x}, ${params.y});`);
|
const coords = precision === 'subpixel' ? `${x.toFixed(2)}, ${y.toFixed(2)}` : `${Math.round(x)}, ${Math.round(y)}`;
|
||||||
|
|
||||||
|
response.addCode(`// Move mouse to (${coords})${precision === 'subpixel' ? ' with subpixel precision' : ''}`);
|
||||||
|
if (delay) response.addCode(`await page.waitForTimeout(${delay});`);
|
||||||
|
response.addCode(`await page.mouse.move(${x}, ${y});`);
|
||||||
|
|
||||||
await tab.waitForCompletion(async () => {
|
await tab.waitForCompletion(async () => {
|
||||||
await tab.page.mouse.move(params.x, params.y);
|
if (delay) await tab.page.waitForTimeout(delay);
|
||||||
|
await tab.page.mouse.move(x, y);
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@ -49,26 +61,45 @@ const mouseClick = defineTabTool({
|
|||||||
schema: {
|
schema: {
|
||||||
name: 'browser_mouse_click_xy',
|
name: 'browser_mouse_click_xy',
|
||||||
title: 'Click',
|
title: 'Click',
|
||||||
description: 'Click left mouse button at a given position',
|
description: 'Click mouse button at a given position with advanced options',
|
||||||
inputSchema: elementSchema.extend({
|
inputSchema: elementSchema.extend(advancedCoordinateSchema.shape).extend({
|
||||||
x: z.number().describe('X coordinate'),
|
button: z.enum(['left', 'right', 'middle']).optional().default('left').describe('Mouse button to click'),
|
||||||
y: z.number().describe('Y coordinate'),
|
clickCount: z.number().min(1).max(3).optional().default(1).describe('Number of clicks (1=single, 2=double, 3=triple)'),
|
||||||
|
holdTime: z.number().min(0).max(2000).optional().default(0).describe('How long to hold button down in milliseconds'),
|
||||||
}),
|
}),
|
||||||
type: 'destructive',
|
type: 'destructive',
|
||||||
},
|
},
|
||||||
|
|
||||||
handle: async (tab, params, response) => {
|
handle: async (tab, params, response) => {
|
||||||
response.setIncludeSnapshot();
|
response.setIncludeSnapshot();
|
||||||
|
|
||||||
response.addCode(`// Click mouse at coordinates (${params.x}, ${params.y})`);
|
const { x, y, precision, delay, button, clickCount, holdTime } = params;
|
||||||
response.addCode(`await page.mouse.move(${params.x}, ${params.y});`);
|
const coords = precision === 'subpixel' ? `${x.toFixed(2)}, ${y.toFixed(2)}` : `${Math.round(x)}, ${Math.round(y)}`;
|
||||||
response.addCode(`await page.mouse.down();`);
|
const clickType = clickCount === 1 ? 'click' : clickCount === 2 ? 'double-click' : 'triple-click';
|
||||||
response.addCode(`await page.mouse.up();`);
|
|
||||||
|
response.addCode(`// ${clickType} ${button} mouse button at (${coords})${precision === 'subpixel' ? ' with subpixel precision' : ''}`);
|
||||||
|
if (delay) response.addCode(`await page.waitForTimeout(${delay});`);
|
||||||
|
response.addCode(`await page.mouse.move(${x}, ${y});`);
|
||||||
|
|
||||||
|
if (clickCount === 1) {
|
||||||
|
response.addCode(`await page.mouse.down({ button: '${button}' });`);
|
||||||
|
if (holdTime > 0) response.addCode(`await page.waitForTimeout(${holdTime});`);
|
||||||
|
response.addCode(`await page.mouse.up({ button: '${button}' });`);
|
||||||
|
} else {
|
||||||
|
response.addCode(`await page.mouse.click(${x}, ${y}, { button: '${button}', clickCount: ${clickCount} });`);
|
||||||
|
}
|
||||||
|
|
||||||
await tab.waitForCompletion(async () => {
|
await tab.waitForCompletion(async () => {
|
||||||
await tab.page.mouse.move(params.x, params.y);
|
if (delay) await tab.page.waitForTimeout(delay);
|
||||||
await tab.page.mouse.down();
|
await tab.page.mouse.move(x, y);
|
||||||
await tab.page.mouse.up();
|
|
||||||
|
if (clickCount === 1) {
|
||||||
|
await tab.page.mouse.down({ button });
|
||||||
|
if (holdTime > 0) await tab.page.waitForTimeout(holdTime);
|
||||||
|
await tab.page.mouse.up({ button });
|
||||||
|
} else {
|
||||||
|
await tab.page.mouse.click(x, y, { button, clickCount });
|
||||||
|
}
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@ -78,12 +109,18 @@ const mouseDrag = defineTabTool({
|
|||||||
schema: {
|
schema: {
|
||||||
name: 'browser_mouse_drag_xy',
|
name: 'browser_mouse_drag_xy',
|
||||||
title: 'Drag mouse',
|
title: 'Drag mouse',
|
||||||
description: 'Drag left mouse button to a given position',
|
description: 'Drag mouse button from start to end position with advanced drag patterns',
|
||||||
inputSchema: elementSchema.extend({
|
inputSchema: elementSchema.extend({
|
||||||
startX: z.number().describe('Start X coordinate'),
|
startX: z.number().describe('Start X coordinate'),
|
||||||
startY: z.number().describe('Start Y coordinate'),
|
startY: z.number().describe('Start Y coordinate'),
|
||||||
endX: z.number().describe('End X coordinate'),
|
endX: z.number().describe('End X coordinate'),
|
||||||
endY: z.number().describe('End Y coordinate'),
|
endY: z.number().describe('End Y coordinate'),
|
||||||
|
button: z.enum(['left', 'right', 'middle']).optional().default('left').describe('Mouse button to drag with'),
|
||||||
|
precision: z.enum(['pixel', 'subpixel']).optional().default('pixel').describe('Coordinate precision level'),
|
||||||
|
pattern: z.enum(['direct', 'smooth', 'bezier']).optional().default('direct').describe('Drag movement pattern'),
|
||||||
|
steps: z.number().min(1).max(50).optional().default(10).describe('Number of intermediate steps for smooth/bezier patterns'),
|
||||||
|
duration: z.number().min(100).max(10000).optional().describe('Total drag duration in milliseconds'),
|
||||||
|
delay: z.number().min(0).max(5000).optional().describe('Delay before starting drag'),
|
||||||
}),
|
}),
|
||||||
type: 'destructive',
|
type: 'destructive',
|
||||||
},
|
},
|
||||||
@ -91,17 +128,211 @@ const mouseDrag = defineTabTool({
|
|||||||
handle: async (tab, params, response) => {
|
handle: async (tab, params, response) => {
|
||||||
response.setIncludeSnapshot();
|
response.setIncludeSnapshot();
|
||||||
|
|
||||||
response.addCode(`// Drag mouse from (${params.startX}, ${params.startY}) to (${params.endX}, ${params.endY})`);
|
const { startX, startY, endX, endY, button, precision, pattern, steps, duration, delay } = params;
|
||||||
response.addCode(`await page.mouse.move(${params.startX}, ${params.startY});`);
|
const startCoords = precision === 'subpixel' ? `${startX.toFixed(2)}, ${startY.toFixed(2)}` : `${Math.round(startX)}, ${Math.round(startY)}`;
|
||||||
response.addCode(`await page.mouse.down();`);
|
const endCoords = precision === 'subpixel' ? `${endX.toFixed(2)}, ${endY.toFixed(2)}` : `${Math.round(endX)}, ${Math.round(endY)}`;
|
||||||
response.addCode(`await page.mouse.move(${params.endX}, ${params.endY});`);
|
|
||||||
response.addCode(`await page.mouse.up();`);
|
response.addCode(`// Drag ${button} mouse button from (${startCoords}) to (${endCoords}) using ${pattern} pattern`);
|
||||||
|
if (delay) response.addCode(`await page.waitForTimeout(${delay});`);
|
||||||
|
response.addCode(`await page.mouse.move(${startX}, ${startY});`);
|
||||||
|
response.addCode(`await page.mouse.down({ button: '${button}' });`);
|
||||||
|
|
||||||
|
if (pattern === 'direct') {
|
||||||
|
response.addCode(`await page.mouse.move(${endX}, ${endY});`);
|
||||||
|
} else {
|
||||||
|
response.addCode(`// ${pattern} drag with ${steps} steps${duration ? `, ${duration}ms duration` : ''}`);
|
||||||
|
for (let i = 1; i <= steps; i++) {
|
||||||
|
let t = i / steps;
|
||||||
|
let x, y;
|
||||||
|
|
||||||
|
if (pattern === 'smooth') {
|
||||||
|
// Smooth easing function
|
||||||
|
t = t * t * (3.0 - 2.0 * t);
|
||||||
|
} else if (pattern === 'bezier') {
|
||||||
|
// Simple bezier curve with control points
|
||||||
|
const controlX = (startX + endX) / 2;
|
||||||
|
const controlY = Math.min(startY, endY) - Math.abs(endX - startX) * 0.2;
|
||||||
|
t = t * t * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
x = startX + (endX - startX) * t;
|
||||||
|
y = startY + (endY - startY) * t;
|
||||||
|
response.addCode(`await page.mouse.move(${x}, ${y});`);
|
||||||
|
if (duration) response.addCode(`await page.waitForTimeout(${Math.floor(duration / steps)});`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response.addCode(`await page.mouse.up({ button: '${button}' });`);
|
||||||
|
|
||||||
await tab.waitForCompletion(async () => {
|
await tab.waitForCompletion(async () => {
|
||||||
await tab.page.mouse.move(params.startX, params.startY);
|
if (delay) await tab.page.waitForTimeout(delay);
|
||||||
await tab.page.mouse.down();
|
await tab.page.mouse.move(startX, startY);
|
||||||
await tab.page.mouse.move(params.endX, params.endY);
|
await tab.page.mouse.down({ button });
|
||||||
await tab.page.mouse.up();
|
|
||||||
|
if (pattern === 'direct') {
|
||||||
|
await tab.page.mouse.move(endX, endY);
|
||||||
|
} else {
|
||||||
|
const stepDelay = duration ? Math.floor(duration / steps) : 50;
|
||||||
|
for (let i = 1; i <= steps; i++) {
|
||||||
|
let t = i / steps;
|
||||||
|
let x, y;
|
||||||
|
|
||||||
|
if (pattern === 'smooth') {
|
||||||
|
t = t * t * (3.0 - 2.0 * t);
|
||||||
|
} else if (pattern === 'bezier') {
|
||||||
|
const controlX = (startX + endX) / 2;
|
||||||
|
const controlY = Math.min(startY, endY) - Math.abs(endX - startX) * 0.2;
|
||||||
|
const u = 1 - t;
|
||||||
|
x = u * u * startX + 2 * u * t * controlX + t * t * endX;
|
||||||
|
y = u * u * startY + 2 * u * t * controlY + t * t * endY;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!x || !y) {
|
||||||
|
x = startX + (endX - startX) * t;
|
||||||
|
y = startY + (endY - startY) * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
await tab.page.mouse.move(x, y);
|
||||||
|
if (stepDelay > 0) await tab.page.waitForTimeout(stepDelay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await tab.page.mouse.up({ button });
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const mouseScroll = defineTabTool({
|
||||||
|
capability: 'vision',
|
||||||
|
schema: {
|
||||||
|
name: 'browser_mouse_scroll_xy',
|
||||||
|
title: 'Scroll at coordinates',
|
||||||
|
description: 'Perform scroll action at specific coordinates with precision control',
|
||||||
|
inputSchema: elementSchema.extend(advancedCoordinateSchema.shape).extend({
|
||||||
|
deltaX: z.number().optional().default(0).describe('Horizontal scroll amount (positive = right, negative = left)'),
|
||||||
|
deltaY: z.number().describe('Vertical scroll amount (positive = down, negative = up)'),
|
||||||
|
smooth: z.boolean().optional().default(false).describe('Use smooth scrolling animation'),
|
||||||
|
}),
|
||||||
|
type: 'destructive',
|
||||||
|
},
|
||||||
|
|
||||||
|
handle: async (tab, params, response) => {
|
||||||
|
response.setIncludeSnapshot();
|
||||||
|
|
||||||
|
const { x, y, deltaX, deltaY, precision, delay, smooth } = params;
|
||||||
|
const coords = precision === 'subpixel' ? `${x.toFixed(2)}, ${y.toFixed(2)}` : `${Math.round(x)}, ${Math.round(y)}`;
|
||||||
|
|
||||||
|
response.addCode(`// Scroll at (${coords}): deltaX=${deltaX}, deltaY=${deltaY}${smooth ? ' (smooth)' : ''}`);
|
||||||
|
if (delay) response.addCode(`await page.waitForTimeout(${delay});`);
|
||||||
|
response.addCode(`await page.mouse.move(${x}, ${y});`);
|
||||||
|
response.addCode(`await page.mouse.wheel(${deltaX}, ${deltaY});`);
|
||||||
|
|
||||||
|
await tab.waitForCompletion(async () => {
|
||||||
|
if (delay) await tab.page.waitForTimeout(delay);
|
||||||
|
await tab.page.mouse.move(x, y);
|
||||||
|
|
||||||
|
if (smooth && Math.abs(deltaY) > 100) {
|
||||||
|
// Break large scrolls into smooth steps
|
||||||
|
const steps = Math.min(10, Math.floor(Math.abs(deltaY) / 50));
|
||||||
|
const stepX = deltaX / steps;
|
||||||
|
const stepY = deltaY / steps;
|
||||||
|
|
||||||
|
for (let i = 0; i < steps; i++) {
|
||||||
|
await tab.page.mouse.wheel(stepX, stepY);
|
||||||
|
await tab.page.waitForTimeout(50);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
await tab.page.mouse.wheel(deltaX, deltaY);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const mouseGesture = defineTabTool({
|
||||||
|
capability: 'vision',
|
||||||
|
schema: {
|
||||||
|
name: 'browser_mouse_gesture_xy',
|
||||||
|
title: 'Mouse gesture',
|
||||||
|
description: 'Perform complex mouse gestures with multiple waypoints',
|
||||||
|
inputSchema: elementSchema.extend({
|
||||||
|
points: z.array(z.object({
|
||||||
|
x: z.number().describe('X coordinate'),
|
||||||
|
y: z.number().describe('Y coordinate'),
|
||||||
|
delay: z.number().min(0).max(5000).optional().describe('Delay at this point in milliseconds'),
|
||||||
|
action: z.enum(['move', 'click', 'down', 'up']).optional().default('move').describe('Action at this point'),
|
||||||
|
})).min(2).describe('Array of points defining the gesture path'),
|
||||||
|
button: z.enum(['left', 'right', 'middle']).optional().default('left').describe('Mouse button for click actions'),
|
||||||
|
precision: z.enum(['pixel', 'subpixel']).optional().default('pixel').describe('Coordinate precision level'),
|
||||||
|
smoothPath: z.boolean().optional().default(false).describe('Smooth the path between points'),
|
||||||
|
}),
|
||||||
|
type: 'destructive',
|
||||||
|
},
|
||||||
|
|
||||||
|
handle: async (tab, params, response) => {
|
||||||
|
response.setIncludeSnapshot();
|
||||||
|
|
||||||
|
const { points, button, precision, smoothPath } = params;
|
||||||
|
|
||||||
|
response.addCode(`// Complex mouse gesture with ${points.length} points${smoothPath ? ' (smooth path)' : ''}`);
|
||||||
|
|
||||||
|
for (let i = 0; i < points.length; i++) {
|
||||||
|
const point = points[i];
|
||||||
|
const coords = precision === 'subpixel' ? `${point.x.toFixed(2)}, ${point.y.toFixed(2)}` : `${Math.round(point.x)}, ${Math.round(point.y)}`;
|
||||||
|
|
||||||
|
if (point.action === 'move') {
|
||||||
|
response.addCode(`// Point ${i + 1}: Move to (${coords})`);
|
||||||
|
response.addCode(`await page.mouse.move(${point.x}, ${point.y});`);
|
||||||
|
} else if (point.action === 'click') {
|
||||||
|
response.addCode(`// Point ${i + 1}: Click at (${coords})`);
|
||||||
|
response.addCode(`await page.mouse.move(${point.x}, ${point.y});`);
|
||||||
|
response.addCode(`await page.mouse.click(${point.x}, ${point.y}, { button: '${button}' });`);
|
||||||
|
} else if (point.action === 'down') {
|
||||||
|
response.addCode(`// Point ${i + 1}: Mouse down at (${coords})`);
|
||||||
|
response.addCode(`await page.mouse.move(${point.x}, ${point.y});`);
|
||||||
|
response.addCode(`await page.mouse.down({ button: '${button}' });`);
|
||||||
|
} else if (point.action === 'up') {
|
||||||
|
response.addCode(`// Point ${i + 1}: Mouse up at (${coords})`);
|
||||||
|
response.addCode(`await page.mouse.move(${point.x}, ${point.y});`);
|
||||||
|
response.addCode(`await page.mouse.up({ button: '${button}' });`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (point.delay) {
|
||||||
|
response.addCode(`await page.waitForTimeout(${point.delay});`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await tab.waitForCompletion(async () => {
|
||||||
|
for (let i = 0; i < points.length; i++) {
|
||||||
|
const point = points[i];
|
||||||
|
|
||||||
|
if (smoothPath && i > 0) {
|
||||||
|
// Smooth path between previous and current point
|
||||||
|
const prevPoint = points[i - 1];
|
||||||
|
const steps = 5;
|
||||||
|
|
||||||
|
for (let step = 1; step <= steps; step++) {
|
||||||
|
const t = step / steps;
|
||||||
|
const x = prevPoint.x + (point.x - prevPoint.x) * t;
|
||||||
|
const y = prevPoint.y + (point.y - prevPoint.y) * t;
|
||||||
|
await tab.page.mouse.move(x, y);
|
||||||
|
await tab.page.waitForTimeout(20);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
await tab.page.mouse.move(point.x, point.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (point.action === 'click') {
|
||||||
|
await tab.page.mouse.click(point.x, point.y, { button });
|
||||||
|
} else if (point.action === 'down') {
|
||||||
|
await tab.page.mouse.down({ button });
|
||||||
|
} else if (point.action === 'up') {
|
||||||
|
await tab.page.mouse.up({ button });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (point.delay) {
|
||||||
|
await tab.page.waitForTimeout(point.delay);
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@ -110,4 +341,6 @@ export default [
|
|||||||
mouseMove,
|
mouseMove,
|
||||||
mouseClick,
|
mouseClick,
|
||||||
mouseDrag,
|
mouseDrag,
|
||||||
|
mouseScroll,
|
||||||
|
mouseGesture,
|
||||||
];
|
];
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user