Adds revolutionary features for MCP client identification and browser automation: MCP Client Debug System: - Floating pill toolbar with client identification and session info - Theme system with 5 built-in themes (minimal, corporate, hacker, glass, high-contrast) - Custom theme creation API with CSS variable overrides - Cross-site validation ensuring toolbar persists across navigation - Session-based injection with persistence across page loads Voice Collaboration (Prototype): - Web Speech API integration for conversational browser automation - Bidirectional voice communication between AI and user - Real-time voice guidance during automation tasks - Documented architecture and future development roadmap Code Injection Enhancements: - Model collaboration API for notify, prompt, and inspector functions - Auto-injection and persistence options - Toolbar integration with code injection system Documentation: - Comprehensive technical achievement documentation - Voice collaboration architecture and implementation guide - Theme system integration documentation - Tool annotation templates for consistency This represents a major advancement in browser automation UX, enabling unprecedented visibility and interaction patterns for MCP clients.
173 lines
5.9 KiB
TypeScript
173 lines
5.9 KiB
TypeScript
/**
|
|
* Copyright (c) Microsoft Corporation.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
import { z } from 'zod';
|
|
|
|
import { defineTabTool, defineTool } from './tool.js';
|
|
import * as javascript from '../javascript.js';
|
|
import { generateLocator } from './utils.js';
|
|
|
|
const snapshot = defineTool({
|
|
capability: 'core',
|
|
schema: {
|
|
name: 'browser_snapshot',
|
|
title: 'Page snapshot',
|
|
description: 'Capture complete accessibility snapshot of the current page. Always returns full snapshot regardless of session snapshot configuration. Better than screenshot for understanding page structure.',
|
|
inputSchema: z.object({}),
|
|
type: 'readOnly',
|
|
},
|
|
|
|
handle: async (context, params, response) => {
|
|
await context.ensureTab();
|
|
response.setForceIncludeSnapshot();
|
|
},
|
|
});
|
|
|
|
export const elementSchema = z.object({
|
|
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
|
ref: z.string().describe('Exact target element reference from the page snapshot'),
|
|
});
|
|
|
|
const clickSchema = elementSchema.extend({
|
|
doubleClick: z.boolean().optional().describe('Whether to perform a double click instead of a single click'),
|
|
button: z.enum(['left', 'right', 'middle']).optional().describe('Button to click, defaults to left'),
|
|
});
|
|
|
|
const click = defineTabTool({
|
|
capability: 'core',
|
|
schema: {
|
|
name: 'browser_click',
|
|
title: 'Click',
|
|
description: `Perform click on a web page. Returns page snapshot after click (configurable via browser_configure_snapshots). Use browser_snapshot for explicit full snapshots.
|
|
|
|
🤖 MODELS: Use mcpNotify.info('message'), mcpPrompt('question?'), and
|
|
mcpInspector.start('click element', callback) for user collaboration.`,
|
|
inputSchema: clickSchema,
|
|
type: 'destructive',
|
|
},
|
|
|
|
handle: async (tab, params, response) => {
|
|
response.setIncludeSnapshot();
|
|
|
|
const locator = await tab.refLocator(params);
|
|
const button = params.button;
|
|
const buttonAttr = button ? `{ button: '${button}' }` : '';
|
|
|
|
if (params.doubleClick) {
|
|
response.addCode(`// Double click ${params.element}`);
|
|
response.addCode(`await page.${await generateLocator(locator)}.dblclick(${buttonAttr});`);
|
|
} else {
|
|
response.addCode(`// Click ${params.element}`);
|
|
response.addCode(`await page.${await generateLocator(locator)}.click(${buttonAttr});`);
|
|
}
|
|
|
|
await tab.waitForCompletion(async () => {
|
|
if (params.doubleClick)
|
|
await locator.dblclick({ button });
|
|
else
|
|
await locator.click({ button });
|
|
});
|
|
},
|
|
});
|
|
|
|
const drag = defineTabTool({
|
|
capability: 'core',
|
|
schema: {
|
|
name: 'browser_drag',
|
|
title: 'Drag mouse',
|
|
description: 'Perform drag and drop between two elements. Returns page snapshot after drag (configurable via browser_configure_snapshots).',
|
|
inputSchema: z.object({
|
|
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
|
|
startRef: z.string().describe('Exact source element reference from the page snapshot'),
|
|
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
|
|
endRef: z.string().describe('Exact target element reference from the page snapshot'),
|
|
}),
|
|
type: 'destructive',
|
|
},
|
|
|
|
handle: async (tab, params, response) => {
|
|
response.setIncludeSnapshot();
|
|
|
|
const [startLocator, endLocator] = await tab.refLocators([
|
|
{ ref: params.startRef, element: params.startElement },
|
|
{ ref: params.endRef, element: params.endElement },
|
|
]);
|
|
|
|
await tab.waitForCompletion(async () => {
|
|
await startLocator.dragTo(endLocator);
|
|
});
|
|
|
|
response.addCode(`await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});`);
|
|
},
|
|
});
|
|
|
|
const hover = defineTabTool({
|
|
capability: 'core',
|
|
schema: {
|
|
name: 'browser_hover',
|
|
title: 'Hover mouse',
|
|
description: 'Hover over element on page. Returns page snapshot after hover (configurable via browser_configure_snapshots).',
|
|
inputSchema: elementSchema,
|
|
type: 'readOnly',
|
|
},
|
|
|
|
handle: async (tab, params, response) => {
|
|
response.setIncludeSnapshot();
|
|
|
|
const locator = await tab.refLocator(params);
|
|
response.addCode(`await page.${await generateLocator(locator)}.hover();`);
|
|
|
|
await tab.waitForCompletion(async () => {
|
|
await locator.hover();
|
|
});
|
|
},
|
|
});
|
|
|
|
const selectOptionSchema = elementSchema.extend({
|
|
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
|
|
});
|
|
|
|
const selectOption = defineTabTool({
|
|
capability: 'core',
|
|
schema: {
|
|
name: 'browser_select_option',
|
|
title: 'Select option',
|
|
description: 'Select an option in a dropdown. Returns page snapshot after selection (configurable via browser_configure_snapshots).',
|
|
inputSchema: selectOptionSchema,
|
|
type: 'destructive',
|
|
},
|
|
|
|
handle: async (tab, params, response) => {
|
|
response.setIncludeSnapshot();
|
|
|
|
const locator = await tab.refLocator(params);
|
|
response.addCode(`// Select options [${params.values.join(', ')}] in ${params.element}`);
|
|
response.addCode(`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(params.values)});`);
|
|
|
|
await tab.waitForCompletion(async () => {
|
|
await locator.selectOption(params.values);
|
|
});
|
|
},
|
|
});
|
|
|
|
export default [
|
|
snapshot,
|
|
click,
|
|
drag,
|
|
hover,
|
|
selectOption,
|
|
];
|