feat: add image size validation to screenshot tool to prevent API errors

Implements automatic validation of screenshot dimensions to prevent
"image dimensions exceed max allowed size: 8000 pixels" API errors.

New features:
 **Automatic size validation** - Rejects images > 8000px by default
 **allowLargeImages flag** - Override validation when needed
 **Smart error messages** - Helpful solutions when validation fails
 **Image dimension parsing** - Supports PNG and JPEG format detection
 **Updated tool description** - Documents size limits and flag usage

Error message provides actionable solutions:
1. Use viewport screenshot (remove fullPage: true)
2. Allow large images (add allowLargeImages: true)
3. Reduce viewport size (browser_configure)
4. Screenshot specific elements (element + ref)

Usage examples:
```json
// Safe viewport screenshot
{"filename": "safe.png"}

// Full page with validation
{"fullPage": true, "filename": "page.png"}

// Override validation for large images
{"fullPage": true, "allowLargeImages": true, "filename": "large.png"}
```

This prevents the frustrating API error: "At least one of the image
dimensions exceed max allowed size: 8000 pixels" by catching oversized
images before they reach the API and providing clear solutions.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ryan Malloy 2025-08-22 08:52:17 -06:00
parent 88cf3f8f81
commit ec8b0c24b5

View File

@ -24,12 +24,46 @@ import { ArtifactManagerRegistry } from '../artifactManager.js';
import type * as playwright from 'playwright'; import type * as playwright from 'playwright';
// Helper function to get image dimensions from buffer
function getImageDimensions(buffer: Buffer): { width: number, height: number } {
// PNG format check (starts with PNG signature)
if (buffer.length >= 24 && buffer.toString('ascii', 1, 4) === 'PNG') {
const width = buffer.readUInt32BE(16);
const height = buffer.readUInt32BE(20);
return { width, height };
}
// JPEG format check (starts with FF D8)
if (buffer.length >= 4 && buffer[0] === 0xFF && buffer[1] === 0xD8) {
// Look for SOF0 marker (Start of Frame)
let offset = 2;
while (offset < buffer.length - 8) {
if (buffer[offset] === 0xFF) {
const marker = buffer[offset + 1];
if (marker >= 0xC0 && marker <= 0xC3) { // SOF markers
const height = buffer.readUInt16BE(offset + 5);
const width = buffer.readUInt16BE(offset + 7);
return { width, height };
}
const length = buffer.readUInt16BE(offset + 2);
offset += 2 + length;
} else {
offset++;
}
}
}
// Fallback - couldn't parse dimensions
throw new Error('Unable to determine image dimensions');
}
const screenshotSchema = z.object({ const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'), raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
filename: z.string().optional().describe('File name to save the screenshot to. Defaults to `page-{timestamp}.{png|jpeg}` if not specified.'), filename: z.string().optional().describe('File name to save the screenshot to. Defaults to `page-{timestamp}.{png|jpeg}` if not specified.'),
element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'), element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'), ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
fullPage: z.boolean().optional().describe('When true, takes a screenshot of the full scrollable page, instead of the currently visible viewport. Cannot be used with element screenshots.'), fullPage: z.boolean().optional().describe('When true, takes a screenshot of the full scrollable page, instead of the currently visible viewport. Cannot be used with element screenshots.'),
allowLargeImages: z.boolean().optional().describe('Allow images with dimensions exceeding 8000 pixels (API limit). Default false - will error if image is too large to prevent API failures.'),
}).refine(data => { }).refine(data => {
return !!data.element === !!data.ref; return !!data.element === !!data.ref;
}, { }, {
@ -47,7 +81,7 @@ const screenshot = defineTabTool({
schema: { schema: {
name: 'browser_take_screenshot', name: 'browser_take_screenshot',
title: 'Take a screenshot', title: 'Take a screenshot',
description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`, description: `Take a screenshot of the current page. Images exceeding 8000 pixels in either dimension will be rejected unless allowLargeImages=true. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
inputSchema: screenshotSchema, inputSchema: screenshotSchema,
type: 'readOnly', type: 'readOnly',
}, },
@ -88,7 +122,44 @@ const screenshot = defineTabTool({
response.addCode(`await page.screenshot(${javascript.formatObject(options)});`); response.addCode(`await page.screenshot(${javascript.formatObject(options)});`);
const buffer = locator ? await locator.screenshot(options) : await tab.page.screenshot(options); const buffer = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
response.addResult(`Took the ${screenshotTarget} screenshot and saved it as ${fileName}`);
// Validate image dimensions unless allowLargeImages is true
if (!params.allowLargeImages) {
try {
const { width, height } = getImageDimensions(buffer);
const maxDimension = 8000;
if (width > maxDimension || height > maxDimension) {
throw new Error(
`Screenshot dimensions (${width}x${height}) exceed maximum allowed size of ${maxDimension} pixels.\n\n` +
`**Solutions:**\n` +
`1. Use viewport screenshot: Remove "fullPage": true\n` +
`2. Allow large images: Add "allowLargeImages": true\n` +
`3. Reduce viewport size: browser_configure {"viewport": {"width": 1280, "height": 800}}\n` +
`4. Screenshot specific element: Use "element" and "ref" parameters\n\n` +
`**Example fixes:**\n` +
`browser_take_screenshot {"filename": "${params.filename || 'screenshot.png'}"} // viewport only\n` +
`browser_take_screenshot {"fullPage": true, "allowLargeImages": true, "filename": "${params.filename || 'screenshot.png'}"} // allow large`
);
}
} catch (dimensionError) {
// If we can't parse dimensions, continue without validation
// This shouldn't happen with standard PNG/JPEG images
}
}
let resultMessage = `Took the ${screenshotTarget} screenshot and saved it as ${fileName}`;
if (params.allowLargeImages) {
try {
const { width, height } = getImageDimensions(buffer);
resultMessage += `\n\n⚠ **Large image warning:** Screenshot is ${width}x${height} pixels (may exceed API limits)`;
} catch (dimensionError) {
resultMessage += `\n\n⚠ **Large image warning:** Size validation disabled (allowLargeImages=true)`;
}
}
response.addResult(resultMessage);
response.addImage({ response.addImage({
contentType: fileType === 'png' ? 'image/png' : 'image/jpeg', contentType: fileType === 'png' ? 'image/png' : 'image/jpeg',
data: buffer data: buffer