Compare commits

..

No commits in common. "5b7a1e0452f5c84d1910317ffae917f1c36651e7" and "da8a244f33cedf80865b4df9937f0b2d5952eb93" have entirely different histories.

36 changed files with 669 additions and 1378 deletions

View File

@ -1,72 +0,0 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
This is the Playwright MCP (Model Context Protocol) server - a TypeScript/Node.js project that provides browser automation capabilities through structured accessibility snapshots. It enables LLMs to interact with web pages without requiring screenshots or vision models.
## Development Commands
**Build:**
- `npm run build` - Build TypeScript to JavaScript in `lib/` directory
- `npm run build:extension` - Build browser extension in `extension/lib/`
- `npm run watch` - Watch mode for main build
- `npm run watch:extension` - Watch mode for extension build
**Testing:**
- `npm test` - Run all Playwright tests
- `npm run ctest` - Run Chrome-specific tests only
- `npm run ftest` - Run Firefox-specific tests only
- `npm run wtest` - Run WebKit-specific tests only
**Linting & Quality:**
- `npm run lint` - Run linter and type checking (includes README update)
- `npm run lint-fix` - Auto-fix linting issues
- `npm run update-readme` - Update README with generated tool documentation
**Development:**
- `npm run clean` - Remove built files from `lib/` and `extension/lib/`
## Architecture
**Core Components:**
- `src/index.ts` - Main entry point providing `createConnection()` API
- `src/server.ts` - MCP server implementation with connection management
- `src/connection.ts` - Creates MCP server with tool handlers and request processing
- `src/tools.ts` - Aggregates all available tools from `src/tools/` directory
- `src/context.ts` - Browser context management and state handling
- `src/browserContextFactory.ts` - Factory for creating browser contexts with different configurations
**Tool System:**
- All browser automation tools are in `src/tools/` directory
- Each tool file exports an array of tool definitions
- Tools are categorized by capability: `core`, `tabs`, `install`, `pdf`, `vision`
- Tool capabilities are filtered based on config to enable/disable features
**Browser Management:**
- Supports multiple browsers: Chrome, Firefox, WebKit, Edge
- Two modes: persistent profile (default) or isolated contexts
- Browser contexts are created through factory pattern for flexibility
- CDP (Chrome DevTools Protocol) support for remote browser connections
**Configuration:**
- `src/config.ts` - Configuration resolution and validation
- Supports both CLI arguments and JSON config files
- Browser launch options, context options, network settings, capabilities
**Transport:**
- Supports both STDIO and HTTP/SSE transports
- STDIO for direct MCP client connections
- HTTP mode for standalone server operation
## Key Files
- `cli.js` - CLI entry point (imports `lib/program.js`)
- `src/program.ts` - Command-line argument parsing and server setup
- `playwright.config.ts` - Test configuration for multiple browser projects
- `tests/fixtures.ts` - Custom Playwright test fixtures for MCP testing
## Extension
The `extension/` directory contains a browser extension for CDP relay functionality, built separately with its own TypeScript config.

View File

@ -511,14 +511,6 @@ http.createServer(async (req, res) => {
<!-- NOTE: This has been generated via update-readme.js -->
- **browser_recording_status**
- Title: Get video recording status
- Description: Check if video recording is currently enabled and get recording details. Use this to verify recording is active before performing actions, or to check output directory and settings.
- Parameters: None
- Read-only: **true**
<!-- NOTE: This has been generated via update-readme.js -->
- **browser_resize**
- Title: Resize browser window
- Description: Resize the browser window
@ -548,24 +540,6 @@ http.createServer(async (req, res) => {
<!-- NOTE: This has been generated via update-readme.js -->
- **browser_start_recording**
- Title: Start video recording
- Description: Start recording browser session video. This must be called BEFORE performing browser actions you want to record. New browser contexts will be created with video recording enabled. Videos are automatically saved when pages/contexts close.
- Parameters:
- `size` (object, optional): Video recording size
- `filename` (string, optional): Base filename for video files (default: session-{timestamp}.webm)
- Read-only: **false**
<!-- NOTE: This has been generated via update-readme.js -->
- **browser_stop_recording**
- Title: Stop video recording
- Description: Stop video recording and return the paths to recorded video files. This closes all active pages to ensure videos are properly saved. Call this when you want to finalize and access the recorded videos.
- Parameters: None
- Read-only: **true**
<!-- NOTE: This has been generated via update-readme.js -->
- **browser_take_screenshot**
- Title: Take a screenshot
- Description: Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.

View File

@ -19,16 +19,14 @@
<title>Playwright MCP extension</title>
</head>
<body>
<div class="header">
<h3>Playwright MCP extension</h3>
</div>
<div id="status-container"></div>
<div class="button-row">
<button id="continue-btn">Continue</button>
<button id="reject-btn">Reject</button>
</div>
<div id="tab-list-container">
<h4>Select page to expose to MCP server:</h4>
<div id="tab-list"></div>
</div>
<script src="lib/connect.js"></script>
</body>
</html>

View File

@ -19,10 +19,6 @@ import { RelayConnection, debugLog } from './relayConnection.js';
type PageMessage = {
type: 'connectToMCPRelay';
mcpRelayUrl: string;
tabId: number;
windowId: number;
} | {
type: 'getTabs';
};
class TabShareExtension {
@ -39,22 +35,22 @@ class TabShareExtension {
private _onMessage(message: PageMessage, sender: chrome.runtime.MessageSender, sendResponse: (response: any) => void) {
switch (message.type) {
case 'connectToMCPRelay':
this._connectTab(message.tabId, message.windowId, message.mcpRelayUrl!).then(
const tabId = sender.tab?.id;
if (!tabId) {
sendResponse({ success: false, error: 'No tab id' });
return true;
}
this._connectTab(sender.tab!, message.mcpRelayUrl!).then(
() => sendResponse({ success: true }),
(error: any) => sendResponse({ success: false, error: error.message }));
return true; // Return true to indicate that the response will be sent asynchronously
case 'getTabs':
this._getTabs().then(
tabs => sendResponse({ success: true, tabs, currentTabId: sender.tab?.id }),
(error: any) => sendResponse({ success: false, error: error.message }));
return true;
}
return false;
}
private async _connectTab(tabId: number, windowId: number, mcpRelayUrl: string): Promise<void> {
private async _connectTab(tab: chrome.tabs.Tab, mcpRelayUrl: string): Promise<void> {
try {
debugLog(`Connecting tab ${tabId} to bridge at ${mcpRelayUrl}`);
debugLog(`Connecting tab ${tab.id} to bridge at ${mcpRelayUrl}`);
const socket = new WebSocket(mcpRelayUrl);
await new Promise<void>((resolve, reject) => {
socket.onopen = () => resolve();
@ -62,7 +58,7 @@ class TabShareExtension {
setTimeout(() => reject(new Error('Connection timeout')), 5000);
});
const connection = new RelayConnection(socket, tabId);
const connection = new RelayConnection(socket, tab.id!);
const connectionClosed = (m: string) => {
debugLog(m);
if (this._activeConnection === connection) {
@ -75,14 +71,14 @@ class TabShareExtension {
this._activeConnection = connection;
await Promise.all([
this._setConnectedTabId(tabId),
chrome.tabs.update(tabId, { active: true }),
chrome.windows.update(windowId, { focused: true }),
this._setConnectedTabId(tab.id!),
chrome.tabs.update(tab.id!, { active: true }),
chrome.windows.update(tab.windowId, { focused: true }),
]);
debugLog(`Connected to MCP bridge`);
} catch (error: any) {
await this._setConnectedTabId(null);
debugLog(`Failed to connect tab ${tabId}:`, error.message);
debugLog(`Failed to connect tab ${tab.id}:`, error.message);
throw error;
}
}
@ -114,11 +110,6 @@ class TabShareExtension {
if (changeInfo.status === 'complete' && this._connectedTabId === tabId)
await this._setConnectedTabId(tabId);
}
private async _getTabs(): Promise<chrome.tabs.Tab[]> {
const tabs = await chrome.tabs.query({});
return tabs;
}
}
new TabShareExtension();

View File

@ -14,39 +14,25 @@
* limitations under the License.
*/
interface TabInfo {
id: number;
windowId: number;
title: string;
url: string;
favIconUrl?: string;
}
class ConnectPage {
private _tabList: HTMLElement;
private _tabListContainer: HTMLElement;
private _statusContainer: HTMLElement;
private _selectedTab: TabInfo | undefined;
constructor() {
this._tabList = document.getElementById('tab-list')!;
this._tabListContainer = document.getElementById('tab-list-container')!;
this._statusContainer = document.getElementById('status-container') as HTMLElement;
this._addButtonHandlers();
void this._loadTabs();
}
private _addButtonHandlers() {
document.addEventListener('DOMContentLoaded', async () => {
const statusContainer = document.getElementById('status-container') as HTMLElement;
const continueBtn = document.getElementById('continue-btn') as HTMLButtonElement;
const rejectBtn = document.getElementById('reject-btn') as HTMLButtonElement;
const buttonRow = document.querySelector('.button-row') as HTMLElement;
function showStatus(type: 'connected' | 'error' | 'connecting', message: string) {
const div = document.createElement('div');
div.className = `status ${type}`;
div.textContent = message;
statusContainer.replaceChildren(div);
}
const params = new URLSearchParams(window.location.search);
const mcpRelayUrl = params.get('mcpRelayUrl');
if (!mcpRelayUrl) {
buttonRow.style.display = 'none';
this._showStatus('error', 'Missing mcpRelayUrl parameter in URL.');
showStatus('error', 'Missing mcpRelayUrl parameter in URL.');
return;
}
@ -55,117 +41,30 @@ class ConnectPage {
const client = JSON.parse(params.get('client') || '{}');
clientInfo = `${client.name}/${client.version}`;
} catch (e) {
this._showStatus('error', 'Failed to parse client version.');
showStatus('error', 'Failed to parse client version.');
return;
}
this._showStatus('connecting', `MCP client "${clientInfo}" is trying to connect. Do you want to continue?`);
showStatus('connecting', `MCP client "${clientInfo}" is trying to connect. Do you want to continue?`);
rejectBtn.addEventListener('click', async () => {
buttonRow.style.display = 'none';
this._tabListContainer.style.display = 'none';
this._showStatus('error', 'Connection rejected. This tab can be closed.');
showStatus('error', 'Connection rejected. This tab can be closed.');
});
continueBtn.addEventListener('click', async () => {
buttonRow.style.display = 'none';
try {
const selectedTab = this._selectedTab;
if (!selectedTab) {
this._showStatus('error', 'Tab not selected.');
return;
}
const response = await chrome.runtime.sendMessage({
type: 'connectToMCPRelay',
mcpRelayUrl,
tabId: selectedTab.id,
windowId: selectedTab.windowId,
mcpRelayUrl
});
if (response?.success)
this._showStatus('connected', `MCP client "${clientInfo}" connected.`);
showStatus('connected', `MCP client "${clientInfo}" connected.`);
else
this._showStatus('error', response?.error || `MCP client "${clientInfo}" failed to connect.`);
showStatus('error', response?.error || `MCP client "${clientInfo}" failed to connect.`);
} catch (e) {
this._showStatus('error', `MCP client "${clientInfo}" failed to connect: ${e}`);
showStatus('error', `MCP client "${clientInfo}" failed to connect: ${e}`);
}
});
}
private async _loadTabs(): Promise<void> {
try {
const response = await chrome.runtime.sendMessage({ type: 'getTabs' });
if (response.success)
this._populateTabList(response.tabs, response.currentTabId);
else
this._showStatus('error', 'Failed to load tabs: ' + response.error);
} catch (error) {
this._showStatus('error', 'Failed to communicate with background script: ' + error);
}
}
private _populateTabList(tabs: TabInfo[], currentTabId: number): void {
this._tabList.replaceChildren();
this._selectedTab = tabs.find(tab => tab.id === currentTabId);
tabs.forEach((tab, index) => {
const tabElement = this._createTabElement(tab);
this._tabList.appendChild(tabElement);
});
}
private _createTabElement(tab: TabInfo): HTMLElement {
const disabled = tab.url.startsWith('chrome://');
const tabInfoDiv = document.createElement('div');
tabInfoDiv.className = 'tab-info';
tabInfoDiv.style.padding = '5px';
if (disabled)
tabInfoDiv.style.opacity = '0.5';
const radioButton = document.createElement('input');
radioButton.type = 'radio';
radioButton.name = 'tab-selection';
radioButton.checked = tab.id === this._selectedTab?.id;
radioButton.id = `tab-${tab.id}`;
radioButton.addEventListener('change', e => {
if (radioButton.checked)
this._selectedTab = tab;
});
if (disabled)
radioButton.disabled = true;
const favicon = document.createElement('img');
favicon.className = 'tab-favicon';
if (tab.favIconUrl)
favicon.src = tab.favIconUrl;
favicon.alt = '';
favicon.style.height = '16px';
favicon.style.width = '16px';
const title = document.createElement('span');
title.style.paddingLeft = '5px';
title.className = 'tab-title';
title.textContent = tab.title || 'Untitled';
const url = document.createElement('span');
url.style.paddingLeft = '5px';
url.className = 'tab-url';
url.textContent = tab.url;
tabInfoDiv.appendChild(radioButton);
tabInfoDiv.appendChild(favicon);
tabInfoDiv.appendChild(title);
tabInfoDiv.appendChild(url);
return tabInfoDiv;
}
private _showStatus(type: 'connected' | 'error' | 'connecting', message: string) {
const div = document.createElement('div');
div.className = `status ${type}`;
div.textContent = message;
this._statusContainer.replaceChildren(div);
}
}
new ConnectPage();
});

3
package-lock.json generated
View File

@ -12,7 +12,6 @@
"@modelcontextprotocol/sdk": "^1.16.0",
"commander": "^13.1.0",
"debug": "^4.4.1",
"dotenv": "^17.2.0",
"mime": "^4.0.7",
"playwright": "1.55.0-alpha-1752701791000",
"playwright-core": "1.55.0-alpha-1752701791000",
@ -35,6 +34,7 @@
"@typescript-eslint/eslint-plugin": "^8.26.1",
"@typescript-eslint/parser": "^8.26.1",
"@typescript-eslint/utils": "^8.26.1",
"dotenv": "^17.2.0",
"eslint": "^9.19.0",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-notice": "^1.0.0",
@ -1289,6 +1289,7 @@
"version": "17.2.0",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.0.tgz",
"integrity": "sha512-Q4sgBT60gzd0BB0lSyYD3xM4YxrXA9y4uBDof1JNYGzOXrQdQ6yX+7XIAqoFOGQFOTK1D3Hts5OllpxMDZFONQ==",
"dev": true,
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"

View File

@ -42,7 +42,6 @@
"@modelcontextprotocol/sdk": "^1.16.0",
"commander": "^13.1.0",
"debug": "^4.4.1",
"dotenv": "^17.2.0",
"mime": "^4.0.7",
"playwright": "1.55.0-alpha-1752701791000",
"playwright-core": "1.55.0-alpha-1752701791000",
@ -62,6 +61,7 @@
"@typescript-eslint/eslint-plugin": "^8.26.1",
"@typescript-eslint/parser": "^8.26.1",
"@typescript-eslint/utils": "^8.26.1",
"dotenv": "^17.2.0",
"eslint": "^9.19.0",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-notice": "^1.0.0",

View File

@ -217,7 +217,7 @@ async function injectCdpPort(browserConfig: FullConfig['browser']) {
(browserConfig.launchOptions as any).cdpPort = await findFreePort();
}
async function findFreePort(): Promise<number> {
async function findFreePort() {
return new Promise((resolve, reject) => {
const server = net.createServer();
server.listen(0, () => {

View File

@ -1,66 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { FullConfig } from './config.js';
import { Context } from './context.js';
import { logUnhandledError } from './log.js';
import { Response } from './response.js';
import { SessionLog } from './sessionLog.js';
import { filteredTools } from './tools.js';
import { packageJSON } from './package.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type * as mcpServer from './mcp/server.js';
import type { ServerBackend } from './mcp/server.js';
import type { Tool } from './tools/tool.js';
export class BrowserServerBackend implements ServerBackend {
name = 'Playwright';
version = packageJSON.version;
private _tools: Tool[];
private _context: Context;
private _sessionLog: SessionLog | undefined;
constructor(config: FullConfig, browserContextFactory: BrowserContextFactory) {
this._tools = filteredTools(config);
this._context = new Context(this._tools, config, browserContextFactory);
}
async initialize() {
this._sessionLog = this._context.config.saveSession ? await SessionLog.create(this._context.config) : undefined;
}
tools(): mcpServer.ToolSchema<any>[] {
return this._tools.map(tool => tool.schema);
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any) {
const response = new Response(this._context, schema.name, parsedArguments);
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
await tool.handle(this._context, parsedArguments, response);
if (this._sessionLog)
await this._sessionLog.log(response);
return await response.serialize();
}
serverInitialized(version: mcpServer.ClientVersion | undefined) {
this._context.clientVersion = version;
}
serverClosed() {
void this._context.dispose().catch(logUnhandledError);
}
}

View File

@ -129,10 +129,8 @@ export function configFromCLIOptions(cliOptions: CLIOptions): Config {
const launchOptions: LaunchOptions = {
channel,
executablePath: cliOptions.executablePath,
headless: cliOptions.headless,
};
if (cliOptions.headless !== undefined) {
launchOptions.headless = cliOptions.headless;
}
// --no-sandbox was passed, disable the sandbox
if (cliOptions.sandbox === false)

84
src/connection.ts Normal file
View File

@ -0,0 +1,84 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListToolsRequestSchema, Tool as McpTool } from '@modelcontextprotocol/sdk/types.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { Context } from './context.js';
import { Response } from './response.js';
import { packageJSON } from './package.js';
import { FullConfig } from './config.js';
import { SessionLog } from './sessionLog.js';
import { logUnhandledError } from './log.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type { Tool } from './tools/tool.js';
export async function createMCPServer(config: FullConfig, tools: Tool<any>[], browserContextFactory: BrowserContextFactory): Promise<Server> {
const context = new Context(tools, config, browserContextFactory);
const server = new Server({ name: 'Playwright', version: packageJSON.version }, {
capabilities: {
tools: {},
}
});
const sessionLog = config.saveSession ? await SessionLog.create(config) : undefined;
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: tools.map(tool => ({
name: tool.schema.name,
description: tool.schema.description,
inputSchema: zodToJsonSchema(tool.schema.inputSchema),
annotations: {
title: tool.schema.title,
readOnlyHint: tool.schema.type === 'readOnly',
destructiveHint: tool.schema.type === 'destructive',
openWorldHint: true,
},
})) as McpTool[],
};
});
server.setRequestHandler(CallToolRequestSchema, async request => {
const errorResult = (...messages: string[]) => ({
content: [{ type: 'text', text: messages.join('\n') }],
isError: true,
});
const tool = tools.find(tool => tool.schema.name === request.params.name);
if (!tool)
return errorResult(`Tool "${request.params.name}" not found`);
try {
const response = new Response(context, request.params.name, request.params.arguments || {});
await tool.handle(context, tool.schema.inputSchema.parse(request.params.arguments || {}), response);
if (sessionLog)
await sessionLog.log(response);
return await response.serialize();
} catch (error) {
return errorResult(String(error));
}
});
server.oninitialized = () => {
context.clientVersion = server.getClientVersion();
};
server.onclose = () => {
void context.dispose().catch(logUnhandledError);
};
return server;
}

View File

@ -17,9 +17,7 @@
import debug from 'debug';
import * as playwright from 'playwright';
import { logUnhandledError } from './log.js';
import { Tab } from './tab.js';
import { EnvironmentIntrospector } from './environmentIntrospection.js';
import type { Tool } from './tools/tool.js';
import type { FullConfig } from './config.js';
@ -35,29 +33,15 @@ export class Context {
private _tabs: Tab[] = [];
private _currentTab: Tab | undefined;
clientVersion: { name: string; version: string; } | undefined;
private _videoRecordingConfig: { dir: string; size?: { width: number; height: number } } | undefined;
private _videoBaseFilename: string | undefined;
private _activePagesWithVideos: Set<playwright.Page> = new Set();
private _environmentIntrospector: EnvironmentIntrospector;
private static _allContexts: Set<Context> = new Set();
private _closeBrowserContextPromise: Promise<void> | undefined;
// Session isolation properties
readonly sessionId: string;
private _sessionStartTime: number;
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory, environmentIntrospector?: EnvironmentIntrospector) {
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory) {
this.tools = tools;
this.config = config;
this._browserContextFactory = browserContextFactory;
this._environmentIntrospector = environmentIntrospector || new EnvironmentIntrospector();
// Generate unique session ID
this._sessionStartTime = Date.now();
this.sessionId = this._generateSessionId();
testDebug(`create context with sessionId: ${this.sessionId}`);
testDebug('create context');
Context._allContexts.add(this);
}
@ -65,28 +49,6 @@ export class Context {
await Promise.all([...Context._allContexts].map(context => context.dispose()));
}
private _generateSessionId(): string {
// Create a base session ID from timestamp and random
const baseId = `${this._sessionStartTime}-${Math.random().toString(36).substr(2, 9)}`;
// If we have client version info, incorporate it
if (this.clientVersion) {
const clientInfo = `${this.clientVersion.name || 'unknown'}-${this.clientVersion.version || 'unknown'}`;
return `${clientInfo}-${baseId}`;
}
return baseId;
}
updateSessionIdWithClientInfo() {
if (this.clientVersion) {
const newSessionId = this._generateSessionId();
testDebug(`updating sessionId from ${this.sessionId} to ${newSessionId}`);
// Note: sessionId is readonly, but we can update it during initialization
(this as any).sessionId = newSessionId;
}
}
tabs(): Tab[] {
return this._tabs;
}
@ -162,11 +124,6 @@ export class Context {
this._tabs.push(tab);
if (!this._currentTab)
this._currentTab = tab;
// Track pages with video recording
if (this._videoRecordingConfig && page.video())
this._activePagesWithVideos.add(page);
}
private _onPageClosed(tab: Tab) {
@ -183,7 +140,7 @@ export class Context {
async closeBrowserContext() {
if (!this._closeBrowserContextPromise)
this._closeBrowserContextPromise = this._closeBrowserContextImpl().catch(logUnhandledError);
this._closeBrowserContextPromise = this._closeBrowserContextImpl();
await this._closeBrowserContextPromise;
this._closeBrowserContextPromise = undefined;
}
@ -236,16 +193,8 @@ export class Context {
private async _setupBrowserContext(): Promise<{ browserContext: playwright.BrowserContext, close: () => Promise<void> }> {
if (this._closeBrowserContextPromise)
throw new Error('Another browser context is being closed.');
let result: { browserContext: playwright.BrowserContext, close: () => Promise<void> };
if (this._videoRecordingConfig) {
// Create a new browser context with video recording enabled
result = await this._createVideoEnabledContext();
} else {
// Use the standard browser context factory
result = await this._browserContextFactory.createContext(this.clientVersion!);
}
// TODO: move to the browser context factory to make it based on isolation mode.
const result = await this._browserContextFactory.createContext(this.clientVersion!);
const { browserContext } = result;
await this._setupRequestInterception(browserContext);
for (const page of browserContext.pages())
@ -261,141 +210,4 @@ export class Context {
}
return result;
}
private async _createVideoEnabledContext(): Promise<{ browserContext: playwright.BrowserContext, close: () => Promise<void> }> {
// For video recording, we need to create an isolated context
const browserType = playwright[this.config.browser.browserName];
// Get environment-specific browser options
const envOptions = this._environmentIntrospector.getRecommendedBrowserOptions();
const browser = await browserType.launch({
...this.config.browser.launchOptions,
...envOptions, // Include environment-detected options
handleSIGINT: false,
handleSIGTERM: false,
});
// Use environment-specific video directory if available
const videoConfig = envOptions.recordVideo ?
{ ...this._videoRecordingConfig, dir: envOptions.recordVideo.dir } :
this._videoRecordingConfig;
const contextOptions = {
...this.config.browser.contextOptions,
recordVideo: videoConfig,
// Force isolated session for video recording with session-specific storage
storageState: undefined, // Always start fresh for video recording
};
const browserContext = await browser.newContext(contextOptions);
return {
browserContext,
close: async () => {
await browserContext.close();
await browser.close();
}
};
}
setVideoRecording(config: { dir: string; size?: { width: number; height: number } }, baseFilename: string) {
this._videoRecordingConfig = config;
this._videoBaseFilename = baseFilename;
// Force recreation of browser context to include video recording
if (this._browserContextPromise) {
void this.closeBrowserContext().then(() => {
// The next call to _ensureBrowserContext will create a new context with video recording
});
}
}
getVideoRecordingInfo() {
return {
enabled: !!this._videoRecordingConfig,
config: this._videoRecordingConfig,
baseFilename: this._videoBaseFilename,
activeRecordings: this._activePagesWithVideos.size,
};
}
updateEnvironmentRoots(roots: { uri: string; name?: string }[]) {
this._environmentIntrospector.updateRoots(roots);
// Log environment change
const summary = this._environmentIntrospector.getEnvironmentSummary();
testDebug(`environment updated for session ${this.sessionId}: ${summary}`);
// If we have an active browser context, we might want to recreate it
// For now, we'll just log the change - full recreation would close existing tabs
if (this._browserContextPromise)
testDebug(`browser context exists - environment changes will apply to new contexts`);
}
getEnvironmentIntrospector(): EnvironmentIntrospector {
return this._environmentIntrospector;
}
async updateBrowserConfig(changes: {
headless?: boolean;
viewport?: { width: number; height: number };
userAgent?: string;
}): Promise<void> {
const currentConfig = { ...this.config };
// Update the configuration
if (changes.headless !== undefined) {
currentConfig.browser.launchOptions.headless = changes.headless;
}
if (changes.viewport) {
currentConfig.browser.contextOptions.viewport = changes.viewport;
}
if (changes.userAgent) {
currentConfig.browser.contextOptions.userAgent = changes.userAgent;
}
// Store the modified config
(this as any).config = currentConfig;
// Close the current browser context to force recreation with new settings
await this.closeBrowserContext();
// Clear tabs since they're attached to the old context
this._tabs = [];
this._currentTab = undefined;
testDebug(`browser config updated for session ${this.sessionId}: headless=${currentConfig.browser.launchOptions.headless}, viewport=${JSON.stringify(currentConfig.browser.contextOptions.viewport)}`);
}
async stopVideoRecording(): Promise<string[]> {
if (!this._videoRecordingConfig)
return [];
const videoPaths: string[] = [];
// Close all pages to save videos
for (const page of this._activePagesWithVideos) {
try {
if (!page.isClosed()) {
await page.close();
const video = page.video();
if (video) {
const videoPath = await video.path();
videoPaths.push(videoPath);
}
}
} catch (error) {
testDebug('Error closing page for video recording:', error);
}
}
this._activePagesWithVideos.clear();
this._videoRecordingConfig = undefined;
this._videoBaseFilename = undefined;
return videoPaths;
}
}

View File

@ -27,12 +27,11 @@ import { spawn } from 'child_process';
import { WebSocket, WebSocketServer } from 'ws';
import debug from 'debug';
import * as playwright from 'playwright';
import { httpAddressToString, startHttpServer } from '../transport.js';
import { BrowserContextFactory } from '../browserContextFactory.js';
// @ts-ignore
const { registry } = await import('playwright-core/lib/server/registry/index');
import { httpAddressToString, startHttpServer } from '../httpServer.js';
import { logUnhandledError } from '../log.js';
import { ManualPromise } from '../manualPromise.js';
import type { BrowserContextFactory } from '../browserContextFactory.js';
import type websocket from 'ws';
const debugLogger = debug('pw:mcp:relay');
@ -67,7 +66,8 @@ export class CDPRelayServer {
sessionId: string;
} | undefined;
private _nextSessionId: number = 1;
private _extensionConnectionPromise!: ManualPromise<void>;
private _extensionConnectionPromise: Promise<void>;
private _extensionConnectionResolve: (() => void) | null = null;
constructor(server: http.Server, browserChannel: string) {
this._wsHost = httpAddressToString(server.address()).replace(/^http/, 'ws');
@ -77,7 +77,9 @@ export class CDPRelayServer {
this._cdpPath = `/cdp/${uuid}`;
this._extensionPath = `/extension/${uuid}`;
this._resetExtensionConnection();
this._extensionConnectionPromise = new Promise(resolve => {
this._extensionConnectionResolve = resolve;
});
this._wss = new WebSocketServer({ server });
this._wss.on('connection', this._onConnection.bind(this));
}
@ -165,15 +167,15 @@ export class CDPRelayServer {
private _closeExtensionConnection(reason: string) {
this._extensionConnection?.close(reason);
this._extensionConnectionPromise.reject(new Error(reason));
this._resetExtensionConnection();
}
private _resetExtensionConnection() {
this._connectedTabInfo = undefined;
this._extensionConnection = null;
this._extensionConnectionPromise = new ManualPromise();
void this._extensionConnectionPromise.catch(logUnhandledError);
this._extensionConnectionPromise = new Promise(resolve => {
this._extensionConnectionResolve = resolve;
});
}
private _closePlaywrightConnection(reason: string) {
@ -196,7 +198,7 @@ export class CDPRelayServer {
this._closePlaywrightConnection(`Extension disconnected: ${reason}`);
};
this._extensionConnection.onmessage = this._handleExtensionMessage.bind(this);
this._extensionConnectionPromise.resolve();
this._extensionConnectionResolve?.();
}
private _handleExtensionMessage(method: string, params: any) {
@ -322,10 +324,10 @@ class ExtensionContextFactory implements BrowserContextFactory {
}
}
export async function startCDPRelayServer(browserChannel: string, abortController: AbortController) {
const httpServer = await startHttpServer({});
export async function startCDPRelayServer(port: number, browserChannel: string) {
const httpServer = await startHttpServer({ port });
const cdpRelayServer = new CDPRelayServer(httpServer, browserChannel);
abortController.signal.addEventListener('abort', () => cdpRelayServer.stop());
process.on('exit', () => cdpRelayServer.stop());
debugLogger(`CDP relay server started, extension endpoint: ${cdpRelayServer.extensionEndpoint()}.`);
return new ExtensionContextFactory(cdpRelayServer);
}

View File

@ -14,14 +14,22 @@
* limitations under the License.
*/
import { startHttpServer, startHttpTransport, startStdioTransport } from '../transport.js';
import { Server } from '../server.js';
import { startCDPRelayServer } from './cdpRelay.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import * as mcpTransport from '../mcp/transport.js';
import { filteredTools } from '../tools.js';
import type { FullConfig } from '../config.js';
export async function runWithExtension(config: FullConfig, abortController: AbortController) {
const contextFactory = await startCDPRelayServer(config.browser.launchOptions.channel || 'chrome', abortController);
const serverBackendFactory = () => new BrowserServerBackend(config, contextFactory);
await mcpTransport.start(serverBackendFactory, config.server);
export async function runWithExtension(config: FullConfig) {
const contextFactory = await startCDPRelayServer(9225, config.browser.launchOptions.channel || 'chrome');
const server = new Server(config, filteredTools(config), contextFactory);
server.setupExitWatchdog();
if (config.server.port !== undefined) {
const httpServer = await startHttpServer(config.server);
startHttpTransport(httpServer, server);
} else {
await startStdioTransport(server);
}
}

View File

@ -14,31 +14,219 @@
* limitations under the License.
*/
import assert from 'assert';
import fs from 'fs';
import path from 'path';
import http from 'http';
import net from 'net';
import type * as net from 'net';
import mime from 'mime';
export async function startHttpServer(config: { host?: string, port?: number }): Promise<http.Server> {
const { host, port } = config;
const httpServer = http.createServer();
await new Promise<void>((resolve, reject) => {
httpServer.on('error', reject);
httpServer.listen(port, host, () => {
resolve();
httpServer.removeListener('error', reject);
import { ManualPromise } from './manualPromise.js';
export type ServerRouteHandler = (request: http.IncomingMessage, response: http.ServerResponse) => void;
export type Transport = {
sendEvent?: (method: string, params: any) => void;
close?: () => void;
onconnect: () => void;
dispatch: (method: string, params: any) => Promise<any>;
onclose: () => void;
};
export class HttpServer {
private _server: http.Server;
private _urlPrefixPrecise: string = '';
private _urlPrefixHumanReadable: string = '';
private _port: number = 0;
private _routes: { prefix?: string, exact?: string, handler: ServerRouteHandler }[] = [];
constructor() {
this._server = http.createServer(this._onRequest.bind(this));
decorateServer(this._server);
}
server() {
return this._server;
}
routePrefix(prefix: string, handler: ServerRouteHandler) {
this._routes.push({ prefix, handler });
}
routePath(path: string, handler: ServerRouteHandler) {
this._routes.push({ exact: path, handler });
}
port(): number {
return this._port;
}
private async _tryStart(port: number | undefined, host: string) {
const errorPromise = new ManualPromise();
const errorListener = (error: Error) => errorPromise.reject(error);
this._server.on('error', errorListener);
try {
this._server.listen(port, host);
await Promise.race([
new Promise(cb => this._server!.once('listening', cb)),
errorPromise,
]);
} finally {
this._server.removeListener('error', errorListener);
}
}
async start(options: { port?: number, preferredPort?: number, host?: string } = {}): Promise<void> {
const host = options.host || 'localhost';
if (options.preferredPort) {
try {
await this._tryStart(options.preferredPort, host);
} catch (e: any) {
if (!e || !e.message || !e.message.includes('EADDRINUSE'))
throw e;
await this._tryStart(undefined, host);
}
} else {
await this._tryStart(options.port, host);
}
const address = this._server.address();
if (typeof address === 'string') {
this._urlPrefixPrecise = address;
this._urlPrefixHumanReadable = address;
} else {
this._port = address!.port;
const resolvedHost = address!.family === 'IPv4' ? address!.address : `[${address!.address}]`;
this._urlPrefixPrecise = `http://${resolvedHost}:${address!.port}`;
this._urlPrefixHumanReadable = `http://${host}:${address!.port}`;
}
}
async stop() {
await new Promise(cb => this._server!.close(cb));
}
urlPrefix(purpose: 'human-readable' | 'precise'): string {
return purpose === 'human-readable' ? this._urlPrefixHumanReadable : this._urlPrefixPrecise;
}
serveFile(request: http.IncomingMessage, response: http.ServerResponse, absoluteFilePath: string, headers?: { [name: string]: string }): boolean {
try {
for (const [name, value] of Object.entries(headers || {}))
response.setHeader(name, value);
if (request.headers.range)
this._serveRangeFile(request, response, absoluteFilePath);
else
this._serveFile(response, absoluteFilePath);
return true;
} catch (e) {
return false;
}
}
_serveFile(response: http.ServerResponse, absoluteFilePath: string) {
const content = fs.readFileSync(absoluteFilePath);
response.statusCode = 200;
const contentType = mime.getType(path.extname(absoluteFilePath)) || 'application/octet-stream';
response.setHeader('Content-Type', contentType);
response.setHeader('Content-Length', content.byteLength);
response.end(content);
}
_serveRangeFile(request: http.IncomingMessage, response: http.ServerResponse, absoluteFilePath: string) {
const range = request.headers.range;
if (!range || !range.startsWith('bytes=') || range.includes(', ') || [...range].filter(char => char === '-').length !== 1) {
response.statusCode = 400;
return response.end('Bad request');
}
// Parse the range header: https://datatracker.ietf.org/doc/html/rfc7233#section-2.1
const [startStr, endStr] = range.replace(/bytes=/, '').split('-');
// Both start and end (when passing to fs.createReadStream) and the range header are inclusive and start counting at 0.
let start: number;
let end: number;
const size = fs.statSync(absoluteFilePath).size;
if (startStr !== '' && endStr === '') {
// No end specified: use the whole file
start = +startStr;
end = size - 1;
} else if (startStr === '' && endStr !== '') {
// No start specified: calculate start manually
start = size - +endStr;
end = size - 1;
} else {
start = +startStr;
end = +endStr;
}
// Handle unavailable range request
if (Number.isNaN(start) || Number.isNaN(end) || start >= size || end >= size || start > end) {
// Return the 416 Range Not Satisfiable: https://datatracker.ietf.org/doc/html/rfc7233#section-4.4
response.writeHead(416, {
'Content-Range': `bytes */${size}`
});
return response.end();
}
// Sending Partial Content: https://datatracker.ietf.org/doc/html/rfc7233#section-4.1
response.writeHead(206, {
'Content-Range': `bytes ${start}-${end}/${size}`,
'Accept-Ranges': 'bytes',
'Content-Length': end - start + 1,
'Content-Type': mime.getType(path.extname(absoluteFilePath))!,
});
return httpServer;
const readable = fs.createReadStream(absoluteFilePath, { start, end });
readable.pipe(response);
}
private _onRequest(request: http.IncomingMessage, response: http.ServerResponse) {
if (request.method === 'OPTIONS') {
response.writeHead(200);
response.end();
return;
}
request.on('error', () => response.end());
try {
if (!request.url) {
response.end();
return;
}
const url = new URL('http://localhost' + request.url);
for (const route of this._routes) {
if (route.exact && url.pathname === route.exact) {
route.handler(request, response);
return;
}
if (route.prefix && url.pathname.startsWith(route.prefix)) {
route.handler(request, response);
return;
}
}
response.statusCode = 404;
response.end();
} catch (e) {
response.end();
}
}
}
export function httpAddressToString(address: string | net.AddressInfo | null): string {
assert(address, 'Could not bind server socket');
if (typeof address === 'string')
return address;
const resolvedPort = address.port;
let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`;
if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]')
resolvedHost = 'localhost';
return `http://${resolvedHost}:${resolvedPort}`;
function decorateServer(server: net.Server) {
const sockets = new Set<net.Socket>();
server.on('connection', socket => {
sockets.add(socket);
socket.once('close', () => sockets.delete(socket));
});
const close = server.close;
server.close = (callback?: (err?: Error) => void) => {
for (const socket of sockets)
socket.destroy();
sockets.clear();
return close.call(server, callback);
};
}

View File

@ -14,11 +14,10 @@
* limitations under the License.
*/
import { BrowserServerBackend } from './browserServerBackend.js';
import { createMCPServer } from './connection.js';
import { resolveConfig } from './config.js';
import { contextFactory } from './browserContextFactory.js';
import * as mcpServer from './mcp/server.js';
import { filteredTools } from './tools.js';
import type { Config } from '../config.js';
import type { BrowserContext } from 'playwright';
import type { BrowserContextFactory } from './browserContextFactory.js';
@ -27,7 +26,7 @@ import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
export async function createConnection(userConfig: Config = {}, contextGetter?: () => Promise<BrowserContext>): Promise<Server> {
const config = await resolveConfig(userConfig);
const factory = contextGetter ? new SimpleBrowserContextFactory(contextGetter) : contextFactory(config.browser);
return mcpServer.createServer(new BrowserServerBackend(config, factory));
return createMCPServer(config, filteredTools(config), factory);
}
class SimpleBrowserContextFactory implements BrowserContextFactory {

View File

@ -41,16 +41,15 @@ export type LLMConversation = {
};
export interface LLMDelegate {
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation;
createConversation(task: string, tools: Tool[]): LLMConversation;
makeApiCall(conversation: LLMConversation): Promise<LLMToolCall[]>;
addToolResults(conversation: LLMConversation, results: Array<{ toolCallId: string; content: string; isError?: boolean }>): void;
checkDoneToolCall(toolCall: LLMToolCall): string | null;
}
export async function runTask(delegate: LLMDelegate, client: Client, task: string, oneShot: boolean = false): Promise<LLMMessage[]> {
export async function runTask(delegate: LLMDelegate, client: Client, task: string): Promise<string> {
const { tools } = await client.listTools();
const taskContent = oneShot ? `Perform following task: ${task}.` : `Perform following task: ${task}. Once the task is complete, call the "done" tool.`;
const conversation = delegate.createConversation(taskContent, tools, oneShot);
const conversation = delegate.createConversation(task, tools);
for (let iteration = 0; iteration < 5; ++iteration) {
debug('history')('Making API call for iteration', iteration);
@ -60,9 +59,10 @@ export async function runTask(delegate: LLMDelegate, client: Client, task: strin
const toolResults: Array<{ toolCallId: string; content: string; isError?: boolean }> = [];
for (const toolCall of toolCalls) {
// Check if this is the "done" tool
const doneResult = delegate.checkDoneToolCall(toolCall);
if (doneResult !== null)
return conversation.messages;
return doneResult;
const { name, arguments: args, id } = toolCall;
try {
@ -99,9 +99,8 @@ export async function runTask(delegate: LLMDelegate, client: Client, task: strin
}
}
// Add tool results to conversation
delegate.addToolResults(conversation, toolResults);
if (oneShot)
return conversation.messages;
}
throw new Error('Failed to perform step, max attempts reached');

View File

@ -14,45 +14,38 @@
* limitations under the License.
*/
import type Anthropic from '@anthropic-ai/sdk';
import Anthropic from '@anthropic-ai/sdk';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'claude-sonnet-4-20250514';
export class ClaudeDelegate implements LLMDelegate {
private _anthropic: Anthropic | undefined;
private anthropic = new Anthropic();
async anthropic(): Promise<Anthropic> {
if (!this._anthropic) {
const anthropic = await import('@anthropic-ai/sdk');
this._anthropic = new anthropic.Anthropic();
}
return this._anthropic;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
createConversation(task: string, tools: Tool[]): LLMConversation {
const llmTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
if (!oneShot) {
// Add the "done" tool
llmTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {},
properties: {
result: { type: 'string', description: 'The result of the task.' },
},
},
});
}
return {
messages: [{
role: 'user',
content: task
content: `Perform following task: ${task}. Once the task is complete, call the "done" tool.`
}],
tools: llmTools,
};
@ -126,8 +119,7 @@ export class ClaudeDelegate implements LLMDelegate {
input_schema: tool.inputSchema,
}));
const anthropic = await this.anthropic();
const response = await anthropic.messages.create({
const response = await this.anthropic.messages.create({
model,
max_tokens: 10000,
messages: claudeMessages,

View File

@ -14,45 +14,39 @@
* limitations under the License.
*/
import type OpenAI from 'openai';
import OpenAI from 'openai';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'gpt-4.1';
export class OpenAIDelegate implements LLMDelegate {
private _openai: OpenAI | undefined;
private openai = new OpenAI();
async openai(): Promise<OpenAI> {
if (!this._openai) {
const oai = await import('openai');
this._openai = new oai.OpenAI();
}
return this._openai;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
createConversation(task: string, tools: Tool[]): LLMConversation {
const genericTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
if (!oneShot) {
// Add the "done" tool
genericTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {},
properties: {
result: { type: 'string', description: 'The result of the task.' },
},
required: ['result'],
},
});
}
return {
messages: [{
role: 'user',
content: task
content: `Peform following task: ${task}. Once the task is complete, call the "done" tool.`
}],
tools: genericTools,
};
@ -114,8 +108,7 @@ export class OpenAIDelegate implements LLMDelegate {
},
}));
const openai = await this.openai();
const response = await openai.chat.completions.create({
const response = await this.openai.chat.completions.create({
model,
messages: openaiMessages,
tools: openaiTools,

View File

@ -49,11 +49,10 @@ async function run(delegate: LLMDelegate) {
await client.connect(transport);
await client.ping();
for (const task of tasks) {
const messages = await runTask(delegate, client, task);
for (const message of messages)
console.log(`${message.role}: ${message.content}`);
}
let lastResult: string | undefined;
for (const task of tasks)
lastResult = await runTask(delegate, client, task);
console.log(lastResult);
await client.close();
}

84
src/loop/onetool.ts Normal file
View File

@ -0,0 +1,84 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import path from 'path';
import url from 'url';
import dotenv from 'dotenv';
import { z } from 'zod';
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
import { FullConfig } from '../config.js';
import { defineTool } from '../tools/tool.js';
import { Server } from '../server.js';
import { startHttpServer, startHttpTransport, startStdioTransport } from '../transport.js';
import { OpenAIDelegate } from './loopOpenAI.js';
import { runTask } from './loop.js';
dotenv.config();
const __filename = url.fileURLToPath(import.meta.url);
let innerClient: Client | undefined;
const delegate = new OpenAIDelegate();
const oneTool = defineTool({
capability: 'core',
schema: {
name: 'browser',
title: 'Perform a task with the browser',
description: 'Perform a task with the browser. It can click, type, export, capture screenshot, drag, hover, select options, etc.',
inputSchema: z.object({
task: z.string().describe('The task to perform with the browser'),
}),
type: 'readOnly',
},
handle: async (context, params, response) => {
const result = await runTask(delegate!, innerClient!, params.task);
response.addResult(result);
},
});
export async function runOneTool(config: FullConfig) {
innerClient = await createInnerClient();
const server = new Server(config, [oneTool]);
server.setupExitWatchdog();
if (config.server.port !== undefined) {
const httpServer = await startHttpServer(config.server);
startHttpTransport(httpServer, server);
} else {
await startStdioTransport(server);
}
}
async function createInnerClient(): Promise<Client> {
const transport = new StdioClientTransport({
command: 'node',
args: [
path.resolve(__filename, '../../../cli.js'),
],
stderr: 'inherit',
env: process.env as Record<string, string>,
});
const client = new Client({ name: 'Playwright Proxy', version: '1.0.0' });
await client.connect(transport);
await client.ping();
return client;
}

View File

@ -1,77 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { contextFactory } from '../browserContextFactory.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import { Context as BrowserContext } from '../context.js';
import { runTask } from '../loop/loop.js';
import { OpenAIDelegate } from '../loop/loopOpenAI.js';
import { ClaudeDelegate } from '../loop/loopClaude.js';
import { InProcessTransport } from '../mcp/inProcessTransport.js';
import * as mcpServer from '../mcp/server.js';
import type { LLMDelegate } from '../loop/loop.js';
import type { FullConfig } from '../config.js';
export class Context {
readonly config: FullConfig;
private _client: Client;
private _delegate: LLMDelegate;
constructor(config: FullConfig, client: Client) {
this.config = config;
this._client = client;
if (process.env.OPENAI_API_KEY)
this._delegate = new OpenAIDelegate();
else if (process.env.ANTHROPIC_API_KEY)
this._delegate = new ClaudeDelegate();
else
throw new Error('No LLM API key found. Please set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable.');
}
static async create(config: FullConfig) {
const client = new Client({ name: 'Playwright Proxy', version: '1.0.0' });
const browserContextFactory = contextFactory(config.browser);
const server = mcpServer.createServer(new BrowserServerBackend(config, browserContextFactory));
await client.connect(new InProcessTransport(server));
await client.ping();
return new Context(config, client);
}
async runTask(task: string, oneShot: boolean = false): Promise<mcpServer.ToolResponse> {
const messages = await runTask(this._delegate, this._client!, task, oneShot);
const lines: string[] = [];
// Skip the first message, which is the user's task.
for (const message of messages.slice(1)) {
// Trim out all page snapshots.
if (!message.content.trim())
continue;
const index = oneShot ? -1 : message.content.indexOf('### Page state');
const trimmedContent = index === -1 ? message.content : message.content.substring(0, index);
lines.push(`[${message.role}]:`, trimmedContent);
}
return {
content: [{ type: 'text', text: lines.join('\n') }],
};
}
async close() {
await BrowserContext.disposeAll();
}
}

View File

@ -1,63 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import dotenv from 'dotenv';
import * as mcpServer from '../mcp/server.js';
import * as mcpTransport from '../mcp/transport.js';
import { packageJSON } from '../package.js';
import { Context } from './context.js';
import { perform } from './perform.js';
import { snapshot } from './snapshot.js';
import type { FullConfig } from '../config.js';
import type { ServerBackend } from '../mcp/server.js';
import type { Tool } from './tool.js';
export async function runLoopTools(config: FullConfig) {
dotenv.config();
const serverBackendFactory = () => new LoopToolsServerBackend(config);
await mcpTransport.start(serverBackendFactory, config.server);
}
class LoopToolsServerBackend implements ServerBackend {
readonly name = 'Playwright';
readonly version = packageJSON.version;
private _config: FullConfig;
private _context: Context | undefined;
private _tools: Tool<any>[] = [perform, snapshot];
constructor(config: FullConfig) {
this._config = config;
}
async initialize() {
this._context = await Context.create(this._config);
}
tools(): mcpServer.ToolSchema<any>[] {
return this._tools.map(tool => tool.schema);
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any): Promise<mcpServer.ToolResponse> {
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
return await tool.handle(this._context!, parsedArguments);
}
serverClosed() {
void this._context!.close();
}
}

View File

@ -1,36 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
const performSchema = z.object({
task: z.string().describe('The task to perform with the browser'),
});
export const perform = defineTool({
schema: {
name: 'browser_perform',
title: 'Perform a task with the browser',
description: 'Perform a task with the browser. It can click, type, export, capture screenshot, drag, hover, select options, etc.',
inputSchema: performSchema,
type: 'destructive',
},
handle: async (context, params) => {
return await context.runTask(params.task);
},
});

View File

@ -1,32 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
export const snapshot = defineTool({
schema: {
name: 'browser_snapshot',
title: 'Take a snapshot of the browser',
description: 'Take a snapshot of the browser to read what is on the page.',
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async (context, params) => {
return await context.runTask('Capture browser snapshot', true);
},
});

View File

@ -1,29 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { z } from 'zod';
import type * as mcpServer from '../mcp/server.js';
import type { Context } from './context.js';
export type Tool<Input extends z.Schema = z.Schema> = {
schema: mcpServer.ToolSchema<Input>;
handle: (context: Context, params: z.output<Input>) => Promise<mcpServer.ToolResponse>;
};
export function defineTool<Input extends z.Schema>(tool: Tool<Input>): Tool<Input> {
return tool;
}

View File

@ -1 +0,0 @@
- Generic MCP utils, no dependencies on Playwright here.

View File

@ -1,92 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
import type { Transport, TransportSendOptions } from '@modelcontextprotocol/sdk/shared/transport.js';
import type { JSONRPCMessage, MessageExtraInfo } from '@modelcontextprotocol/sdk/types.js';
export class InProcessTransport implements Transport {
private _server: Server;
private _serverTransport: InProcessServerTransport;
private _connected: boolean = false;
constructor(server: Server) {
this._server = server;
this._serverTransport = new InProcessServerTransport(this);
}
async start(): Promise<void> {
if (this._connected)
throw new Error('InprocessTransport already started!');
await this._server.connect(this._serverTransport);
this._connected = true;
}
async send(message: JSONRPCMessage, options?: TransportSendOptions): Promise<void> {
if (!this._connected)
throw new Error('Transport not connected');
this._serverTransport._receiveFromClient(message);
}
async close(): Promise<void> {
if (this._connected) {
this._connected = false;
this.onclose?.();
this._serverTransport.onclose?.();
}
}
onclose?: (() => void) | undefined;
onerror?: ((error: Error) => void) | undefined;
onmessage?: ((message: JSONRPCMessage, extra?: MessageExtraInfo) => void) | undefined;
sessionId?: string | undefined;
setProtocolVersion?: ((version: string) => void) | undefined;
_receiveFromServer(message: JSONRPCMessage, extra?: MessageExtraInfo): void {
this.onmessage?.(message, extra);
}
}
class InProcessServerTransport implements Transport {
private _clientTransport: InProcessTransport;
constructor(clientTransport: InProcessTransport) {
this._clientTransport = clientTransport;
}
async start(): Promise<void> {
}
async send(message: JSONRPCMessage, options?: TransportSendOptions): Promise<void> {
this._clientTransport._receiveFromServer(message);
}
async close(): Promise<void> {
this.onclose?.();
}
onclose?: (() => void) | undefined;
onerror?: ((error: Error) => void) | undefined;
onmessage?: ((message: JSONRPCMessage, extra?: MessageExtraInfo) => void) | undefined;
sessionId?: string | undefined;
setProtocolVersion?: ((version: string) => void) | undefined;
_receiveFromClient(message: JSONRPCMessage): void {
this.onmessage?.(message);
}
}

View File

@ -1,105 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { ImageContent, Implementation, TextContent } from '@modelcontextprotocol/sdk/types.js';
import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
export type ClientVersion = Implementation;
export type ToolResponse = {
content: (TextContent | ImageContent)[];
isError?: boolean;
};
export type ToolSchema<Input extends z.Schema> = {
name: string;
title: string;
description: string;
inputSchema: Input;
type: 'readOnly' | 'destructive';
};
export type ToolHandler = (toolName: string, params: any) => Promise<ToolResponse>;
export interface ServerBackend {
name: string;
version: string;
initialize?(): Promise<void>;
tools(): ToolSchema<any>[];
callTool(schema: ToolSchema<any>, parsedArguments: any): Promise<ToolResponse>;
serverInitialized?(version: ClientVersion | undefined): void;
serverClosed?(): void;
}
export type ServerBackendFactory = () => ServerBackend;
export async function connect(serverBackendFactory: ServerBackendFactory, transport: Transport) {
const backend = serverBackendFactory();
await backend.initialize?.();
const server = createServer(backend);
await server.connect(transport);
}
export function createServer(backend: ServerBackend): Server {
const server = new Server({ name: backend.name, version: backend.version }, {
capabilities: {
tools: {},
}
});
const tools = backend.tools();
server.setRequestHandler(ListToolsRequestSchema, async () => {
return { tools: tools.map(tool => ({
name: tool.name,
description: tool.description,
inputSchema: zodToJsonSchema(tool.inputSchema),
annotations: {
title: tool.title,
readOnlyHint: tool.type === 'readOnly',
destructiveHint: tool.type === 'destructive',
openWorldHint: true,
},
})) };
});
server.setRequestHandler(CallToolRequestSchema, async request => {
const errorResult = (...messages: string[]) => ({
content: [{ type: 'text', text: messages.join('\n') }],
isError: true,
});
const tool = tools.find(tool => tool.name === request.params.name) as ToolSchema<any>;
if (!tool)
return errorResult(`Tool "${request.params.name}" not found`);
try {
return await backend.callTool(tool, tool.inputSchema.parse(request.params.arguments || {}));
} catch (error) {
return errorResult(String(error));
}
});
if (backend.serverInitialized)
server.oninitialized = () => backend.serverInitialized!(server.getClientVersion());
if (backend.serverClosed)
server.onclose = () => backend.serverClosed!();
return server;
}

View File

@ -18,14 +18,12 @@ import { program, Option } from 'commander';
// @ts-ignore
import { startTraceViewerServer } from 'playwright-core/lib/server';
import * as mcpTransport from './mcp/transport.js';
import { startHttpServer, startHttpTransport, startStdioTransport } from './transport.js';
import { commaSeparatedList, resolveCLIConfig, semicolonSeparatedList } from './config.js';
import { Server } from './server.js';
import { packageJSON } from './package.js';
import { runWithExtension } from './extension/main.js';
import { BrowserServerBackend } from './browserServerBackend.js';
import { Context } from './context.js';
import { contextFactory } from './browserContextFactory.js';
import { runLoopTools } from './loopTools/main.js';
import { filteredTools } from './tools.js';
program
.version('Version ' + packageJSON.version)
@ -56,30 +54,29 @@ program
.option('--user-data-dir <path>', 'path to the user data directory. If not specified, a temporary directory will be created.')
.option('--viewport-size <size>', 'specify browser viewport size in pixels, for example "1280, 720"')
.addOption(new Option('--extension', 'Connect to a running browser instance (Edge/Chrome only). Requires the "Playwright MCP Bridge" browser extension to be installed.').hideHelp())
.addOption(new Option('--loop-tools', 'Run loop tools').hideHelp())
.addOption(new Option('--vision', 'Legacy option, use --caps=vision instead').hideHelp())
.action(async options => {
if (options.vision) {
// eslint-disable-next-line no-console
console.error('The --vision option is deprecated, use --caps=vision instead');
options.caps = 'vision';
}
const config = await resolveCLIConfig(options);
const abortController = setupExitWatchdog(config.server);
if (options.extension) {
await runWithExtension(config, abortController);
return;
}
if (options.loopTools) {
await runLoopTools(config);
await runWithExtension(config);
return;
}
const browserContextFactory = contextFactory(config.browser);
const serverBackendFactory = () => new BrowserServerBackend(config, browserContextFactory);
await mcpTransport.start(serverBackendFactory, config.server);
const server = new Server(config, filteredTools(config));
server.setupExitWatchdog();
if (config.server.port !== undefined) {
const httpServer = await startHttpServer(config.server);
startHttpTransport(httpServer, server);
} else {
await startStdioTransport(server);
}
if (config.saveTrace) {
const server = await startTraceViewerServer();
@ -90,27 +87,4 @@ program
}
});
function setupExitWatchdog(serverConfig: { host?: string; port?: number }) {
const abortController = new AbortController();
let isExiting = false;
const handleExit = async () => {
if (isExiting)
return;
isExiting = true;
setTimeout(() => process.exit(0), 15000);
abortController.abort('Process exiting');
await Context.disposeAll();
process.exit(0);
};
if (serverConfig.port !== undefined) {
process.stdin.on('close', handleExit);
}
process.on('SIGINT', handleExit);
process.on('SIGTERM', handleExit);
return abortController;
}
void program.parseAsync(process.argv);

59
src/server.ts Normal file
View File

@ -0,0 +1,59 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { createMCPServer } from './connection.js';
import { Context } from './context.js';
import { contextFactory as defaultContextFactory } from './browserContextFactory.js';
import type { FullConfig } from './config.js';
import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type { Tool } from './tools/tool.js';
export class Server {
readonly config: FullConfig;
private _browserConfig: FullConfig['browser'];
private _contextFactory: BrowserContextFactory;
readonly tools: Tool<any>[];
constructor(config: FullConfig, tools: Tool<any>[], contextFactory?: BrowserContextFactory) {
this.config = config;
this.tools = tools;
this._browserConfig = config.browser;
this._contextFactory = contextFactory ?? defaultContextFactory(this._browserConfig);
}
async createConnection(transport: Transport): Promise<void> {
const server = await createMCPServer(this.config, this.tools, this._contextFactory);
await server.connect(transport);
}
setupExitWatchdog() {
let isExiting = false;
const handleExit = async () => {
if (isExiting)
return;
isExiting = true;
setTimeout(() => process.exit(0), 15000);
await Context.disposeAll();
process.exit(0);
};
process.stdin.on('close', handleExit);
process.on('SIGINT', handleExit);
process.on('SIGTERM', handleExit);
}
}

View File

@ -15,7 +15,6 @@
*/
import common from './tools/common.js';
import configure from './tools/configure.js';
import console from './tools/console.js';
import dialogs from './tools/dialogs.js';
import evaluate from './tools/evaluate.js';
@ -28,7 +27,6 @@ import pdf from './tools/pdf.js';
import snapshot from './tools/snapshot.js';
import tabs from './tools/tabs.js';
import screenshot from './tools/screenshot.js';
import video from './tools/video.js';
import wait from './tools/wait.js';
import mouse from './tools/mouse.js';
@ -37,7 +35,6 @@ import type { FullConfig } from './config.js';
export const allTools: Tool<any>[] = [
...common,
...configure,
...console,
...dialogs,
...evaluate,
@ -51,7 +48,6 @@ export const allTools: Tool<any>[] = [
...screenshot,
...snapshot,
...tabs,
...video,
...wait,
];

View File

@ -1,87 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import type { Context } from '../context.js';
import type { Response } from '../response.js';
const configureSchema = z.object({
headless: z.boolean().optional().describe('Whether to run the browser in headless mode'),
viewport: z.object({
width: z.number(),
height: z.number(),
}).optional().describe('Browser viewport size'),
userAgent: z.string().optional().describe('User agent string for the browser'),
});
export default [
defineTool({
capability: 'core',
schema: {
name: 'browser_configure',
title: 'Configure browser settings',
description: 'Change browser configuration settings like headless/headed mode, viewport size, or user agent for subsequent operations. This will close the current browser and restart it with new settings.',
inputSchema: configureSchema,
type: 'destructive',
},
handle: async (context: Context, params: z.output<typeof configureSchema>, response: Response) => {
try {
const currentConfig = context.config;
const changes: string[] = [];
// Track what's changing
if (params.headless !== undefined) {
const currentHeadless = currentConfig.browser.launchOptions.headless;
if (params.headless !== currentHeadless) {
changes.push(`headless: ${currentHeadless}${params.headless}`);
}
}
if (params.viewport) {
const currentViewport = currentConfig.browser.contextOptions.viewport;
if (!currentViewport || currentViewport.width !== params.viewport.width || currentViewport.height !== params.viewport.height) {
changes.push(`viewport: ${currentViewport?.width || 'default'}x${currentViewport?.height || 'default'}${params.viewport.width}x${params.viewport.height}`);
}
}
if (params.userAgent) {
const currentUA = currentConfig.browser.contextOptions.userAgent;
if (params.userAgent !== currentUA) {
changes.push(`userAgent: ${currentUA || 'default'}${params.userAgent}`);
}
}
if (changes.length === 0) {
response.addResult('No configuration changes detected. Current settings remain the same.');
return;
}
// Apply the configuration changes
await context.updateBrowserConfig({
headless: params.headless,
viewport: params.viewport,
userAgent: params.userAgent,
});
response.addResult(`Browser configuration updated successfully:\n${changes.map(c => `${c}`).join('\n')}\n\nThe browser has been restarted with the new settings.`);
} catch (error) {
throw new Error(`Failed to update browser configuration: ${error}`);
}
},
}),
];

View File

@ -20,7 +20,16 @@ import type * as playwright from 'playwright';
import type { ToolCapability } from '../../config.js';
import type { Tab } from '../tab.js';
import type { Response } from '../response.js';
import type { ToolSchema } from '../mcp/server.js';
export type ToolSchema<Input extends InputType> = {
name: string;
title: string;
description: string;
inputSchema: Input;
type: 'readOnly' | 'destructive';
};
type InputType = z.Schema;
export type FileUploadModalState = {
type: 'fileChooser';
@ -36,24 +45,44 @@ export type DialogModalState = {
export type ModalState = FileUploadModalState | DialogModalState;
export type Tool<Input extends z.Schema = z.Schema> = {
export type SnapshotContent = {
type: 'snapshot';
snapshot: string;
};
export type TextContent = {
type: 'text';
text: string;
};
export type ImageContent = {
type: 'image';
image: string;
};
export type CodeContent = {
type: 'code';
code: string[];
};
export type Tool<Input extends InputType = InputType> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
handle: (context: Context, params: z.output<Input>, response: Response) => Promise<void>;
};
export function defineTool<Input extends z.Schema>(tool: Tool<Input>): Tool<Input> {
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
return tool;
}
export type TabTool<Input extends z.Schema = z.Schema> = {
export type TabTool<Input extends InputType = InputType> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
clearsModalState?: ModalState['type'];
handle: (tab: Tab, params: z.output<Input>, response: Response) => Promise<void>;
};
export function defineTabTool<Input extends z.Schema>(tool: TabTool<Input>): Tool<Input> {
export function defineTabTool<Input extends InputType>(tool: TabTool<Input>): Tool<Input> {
return {
...tool,
handle: async (context, params, response) => {

View File

@ -1,143 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import path from 'path';
import { z } from 'zod';
import { defineTool } from './tool.js';
const startRecording = defineTool({
capability: 'core',
schema: {
name: 'browser_start_recording',
title: 'Start video recording',
description: 'Start recording browser session video. This must be called BEFORE performing browser actions you want to record. New browser contexts will be created with video recording enabled. Videos are automatically saved when pages/contexts close.',
inputSchema: z.object({
size: z.object({
width: z.number().optional().describe('Video width in pixels (default: scales to fit 800x800)'),
height: z.number().optional().describe('Video height in pixels (default: scales to fit 800x800)'),
}).optional().describe('Video recording size'),
filename: z.string().optional().describe('Base filename for video files (default: session-{timestamp}.webm)'),
}),
type: 'destructive',
},
handle: async (context, params, response) => {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const baseFilename = params.filename || `session-${timestamp}`;
const videoDir = path.join(context.config.outputDir, 'videos');
// Update context options to enable video recording
const recordVideoOptions: any = {
dir: videoDir,
};
if (params.size)
recordVideoOptions.size = params.size;
// Store video recording config in context for future browser contexts
context.setVideoRecording(recordVideoOptions, baseFilename);
response.addResult(`✓ Video recording enabled. Videos will be saved to: ${videoDir}`);
response.addResult(`✓ Video files will be named: ${baseFilename}-*.webm`);
response.addResult(`\nNext steps:`);
response.addResult(`1. Navigate to pages and perform browser actions`);
response.addResult(`2. Use browser_stop_recording when finished to save videos`);
response.addResult(`3. Videos are automatically saved when pages close`);
response.addCode(`// Video recording enabled for new browser contexts`);
response.addCode(`const context = await browser.newContext({`);
response.addCode(` recordVideo: {`);
response.addCode(` dir: '${videoDir}',`);
if (params.size)
response.addCode(` size: { width: ${params.size.width || 'auto'}, height: ${params.size.height || 'auto'} }`);
response.addCode(` }`);
response.addCode(`});`);
},
});
const stopRecording = defineTool({
capability: 'core',
schema: {
name: 'browser_stop_recording',
title: 'Stop video recording',
description: 'Stop video recording and return the paths to recorded video files. This closes all active pages to ensure videos are properly saved. Call this when you want to finalize and access the recorded videos.',
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async (context, params, response) => {
const videoPaths = await context.stopVideoRecording();
if (videoPaths.length === 0) {
response.addResult('No video recording was active.');
return;
}
response.addResult(`✓ Video recording stopped. ${videoPaths.length} video file(s) saved:`);
for (const videoPath of videoPaths)
response.addResult(`📹 ${videoPath}`);
response.addResult(`\nVideos are now ready for viewing or sharing.`);
response.addCode(`// Video recording stopped`);
response.addCode(`await context.close(); // Ensures video is saved`);
},
});
const getRecordingStatus = defineTool({
capability: 'core',
schema: {
name: 'browser_recording_status',
title: 'Get video recording status',
description: 'Check if video recording is currently enabled and get recording details. Use this to verify recording is active before performing actions, or to check output directory and settings.',
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async (context, params, response) => {
const recordingInfo = context.getVideoRecordingInfo();
if (!recordingInfo.enabled) {
response.addResult('❌ Video recording is not enabled.');
response.addResult('\n💡 To start recording:');
response.addResult('1. Use browser_start_recording to enable recording');
response.addResult('2. Navigate to pages and perform actions');
response.addResult('3. Use browser_stop_recording to save videos');
return;
}
response.addResult(`✅ Video recording is active:`);
response.addResult(`📁 Output directory: ${recordingInfo.config?.dir}`);
response.addResult(`📝 Base filename: ${recordingInfo.baseFilename}`);
if (recordingInfo.config?.size)
response.addResult(`📐 Video size: ${recordingInfo.config.size.width}x${recordingInfo.config.size.height}`);
else
response.addResult(`📐 Video size: auto-scaled to fit 800x800`);
response.addResult(`🎬 Active recordings: ${recordingInfo.activeRecordings}`);
if (recordingInfo.activeRecordings === 0)
response.addResult(`\n💡 Tip: Navigate to pages to start recording browser actions`);
},
});
export default [
startRecording,
stopRecording,
getRecordingStatus,
];

View File

@ -14,34 +14,25 @@
* limitations under the License.
*/
import http from 'http';
import crypto from 'crypto';
import debug from 'debug';
import http from 'node:http';
import assert from 'node:assert';
import crypto from 'node:crypto';
import debug from 'debug';
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { httpAddressToString, startHttpServer } from '../httpServer.js';
import * as mcpServer from './server.js';
import type { ServerBackendFactory } from './server.js';
import type { AddressInfo } from 'node:net';
import type { Server } from './server.js';
export async function start(serverBackendFactory: ServerBackendFactory, options: { host?: string; port?: number }) {
if (options.port !== undefined) {
const httpServer = await startHttpServer(options);
startHttpTransport(httpServer, serverBackendFactory);
} else {
await startStdioTransport(serverBackendFactory);
}
}
async function startStdioTransport(serverBackendFactory: ServerBackendFactory) {
await mcpServer.connect(serverBackendFactory, new StdioServerTransport());
export async function startStdioTransport(server: Server) {
await server.createConnection(new StdioServerTransport());
}
const testDebug = debug('pw:mcp:test');
async function handleSSE(serverBackendFactory: ServerBackendFactory, req: http.IncomingMessage, res: http.ServerResponse, url: URL, sessions: Map<string, SSEServerTransport>) {
async function handleSSE(server: Server, req: http.IncomingMessage, res: http.ServerResponse, url: URL, sessions: Map<string, SSEServerTransport>) {
if (req.method === 'POST') {
const sessionId = url.searchParams.get('sessionId');
if (!sessionId) {
@ -60,7 +51,7 @@ async function handleSSE(serverBackendFactory: ServerBackendFactory, req: http.I
const transport = new SSEServerTransport('/sse', res);
sessions.set(transport.sessionId, transport);
testDebug(`create SSE session: ${transport.sessionId}`);
await mcpServer.connect(serverBackendFactory, transport);
await server.createConnection(transport);
res.on('close', () => {
testDebug(`delete SSE session: ${transport.sessionId}`);
sessions.delete(transport.sessionId);
@ -72,7 +63,7 @@ async function handleSSE(serverBackendFactory: ServerBackendFactory, req: http.I
res.end('Method not allowed');
}
async function handleStreamable(serverBackendFactory: ServerBackendFactory, req: http.IncomingMessage, res: http.ServerResponse, sessions: Map<string, StreamableHTTPServerTransport>) {
async function handleStreamable(server: Server, req: http.IncomingMessage, res: http.ServerResponse, sessions: Map<string, StreamableHTTPServerTransport>) {
const sessionId = req.headers['mcp-session-id'] as string | undefined;
if (sessionId) {
const transport = sessions.get(sessionId);
@ -89,7 +80,7 @@ async function handleStreamable(serverBackendFactory: ServerBackendFactory, req:
sessionIdGenerator: () => crypto.randomUUID(),
onsessioninitialized: async sessionId => {
testDebug(`create http session: ${transport.sessionId}`);
await mcpServer.connect(serverBackendFactory, transport);
await server.createConnection(transport);
sessions.set(sessionId, transport);
}
});
@ -109,15 +100,28 @@ async function handleStreamable(serverBackendFactory: ServerBackendFactory, req:
res.end('Invalid request');
}
function startHttpTransport(httpServer: http.Server, serverBackendFactory: ServerBackendFactory) {
export async function startHttpServer(config: { host?: string, port?: number }): Promise<http.Server> {
const { host, port } = config;
const httpServer = http.createServer();
await new Promise<void>((resolve, reject) => {
httpServer.on('error', reject);
httpServer.listen(port, host, () => {
resolve();
httpServer.removeListener('error', reject);
});
});
return httpServer;
}
export function startHttpTransport(httpServer: http.Server, mcpServer: Server) {
const sseSessions = new Map();
const streamableSessions = new Map();
httpServer.on('request', async (req, res) => {
const url = new URL(`http://localhost${req.url}`);
if (url.pathname.startsWith('/sse'))
await handleSSE(serverBackendFactory, req, res, url, sseSessions);
await handleSSE(mcpServer, req, res, url, sseSessions);
else
await handleStreamable(serverBackendFactory, req, res, streamableSessions);
await handleStreamable(mcpServer, req, res, streamableSessions);
});
const url = httpAddressToString(httpServer.address());
const message = [
@ -135,3 +139,14 @@ function startHttpTransport(httpServer: http.Server, serverBackendFactory: Serve
// eslint-disable-next-line no-console
console.error(message);
}
export function httpAddressToString(address: string | AddressInfo | null): string {
assert(address, 'Could not bind server socket');
if (typeof address === 'string')
return address;
const resolvedPort = address.port;
let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`;
if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]')
resolvedHost = 'localhost';
return `http://${resolvedHost}:${resolvedPort}`;
}