Compare commits

..

No commits in common. "4d13e72213a9ec9666ec31c32f3a7fa2168b6bff" and "d1100bac8ab51e0c3f1227e0b1640c0a135b240a" have entirely different histories.

36 changed files with 813 additions and 1494 deletions

View File

@ -19,16 +19,14 @@
<title>Playwright MCP extension</title>
</head>
<body>
<div class="header">
<h3>Playwright MCP extension</h3>
</div>
<div id="status-container"></div>
<div class="button-row">
<button id="continue-btn">Continue</button>
<button id="reject-btn">Reject</button>
</div>
<div id="tab-list-container">
<h4>Select page to expose to MCP server:</h4>
<div id="tab-list"></div>
</div>
<script src="lib/connect.js"></script>
</body>
</html>

View File

@ -19,10 +19,6 @@ import { RelayConnection, debugLog } from './relayConnection.js';
type PageMessage = {
type: 'connectToMCPRelay';
mcpRelayUrl: string;
tabId: number;
windowId: number;
} | {
type: 'getTabs';
};
class TabShareExtension {
@ -39,22 +35,22 @@ class TabShareExtension {
private _onMessage(message: PageMessage, sender: chrome.runtime.MessageSender, sendResponse: (response: any) => void) {
switch (message.type) {
case 'connectToMCPRelay':
this._connectTab(message.tabId, message.windowId, message.mcpRelayUrl!).then(
const tabId = sender.tab?.id;
if (!tabId) {
sendResponse({ success: false, error: 'No tab id' });
return true;
}
this._connectTab(sender.tab!, message.mcpRelayUrl!).then(
() => sendResponse({ success: true }),
(error: any) => sendResponse({ success: false, error: error.message }));
return true; // Return true to indicate that the response will be sent asynchronously
case 'getTabs':
this._getTabs().then(
tabs => sendResponse({ success: true, tabs, currentTabId: sender.tab?.id }),
(error: any) => sendResponse({ success: false, error: error.message }));
return true;
}
return false;
}
private async _connectTab(tabId: number, windowId: number, mcpRelayUrl: string): Promise<void> {
private async _connectTab(tab: chrome.tabs.Tab, mcpRelayUrl: string): Promise<void> {
try {
debugLog(`Connecting tab ${tabId} to bridge at ${mcpRelayUrl}`);
debugLog(`Connecting tab ${tab.id} to bridge at ${mcpRelayUrl}`);
const socket = new WebSocket(mcpRelayUrl);
await new Promise<void>((resolve, reject) => {
socket.onopen = () => resolve();
@ -62,7 +58,7 @@ class TabShareExtension {
setTimeout(() => reject(new Error('Connection timeout')), 5000);
});
const connection = new RelayConnection(socket, tabId);
const connection = new RelayConnection(socket, tab.id!);
const connectionClosed = (m: string) => {
debugLog(m);
if (this._activeConnection === connection) {
@ -75,14 +71,14 @@ class TabShareExtension {
this._activeConnection = connection;
await Promise.all([
this._setConnectedTabId(tabId),
chrome.tabs.update(tabId, { active: true }),
chrome.windows.update(windowId, { focused: true }),
this._setConnectedTabId(tab.id!),
chrome.tabs.update(tab.id!, { active: true }),
chrome.windows.update(tab.windowId, { focused: true }),
]);
debugLog(`Connected to MCP bridge`);
} catch (error: any) {
await this._setConnectedTabId(null);
debugLog(`Failed to connect tab ${tabId}:`, error.message);
debugLog(`Failed to connect tab ${tab.id}:`, error.message);
throw error;
}
}
@ -114,11 +110,6 @@ class TabShareExtension {
if (changeInfo.status === 'complete' && this._connectedTabId === tabId)
await this._setConnectedTabId(tabId);
}
private async _getTabs(): Promise<chrome.tabs.Tab[]> {
const tabs = await chrome.tabs.query({});
return tabs;
}
}
new TabShareExtension();

View File

@ -14,39 +14,25 @@
* limitations under the License.
*/
interface TabInfo {
id: number;
windowId: number;
title: string;
url: string;
favIconUrl?: string;
}
class ConnectPage {
private _tabList: HTMLElement;
private _tabListContainer: HTMLElement;
private _statusContainer: HTMLElement;
private _selectedTab: TabInfo | undefined;
constructor() {
this._tabList = document.getElementById('tab-list')!;
this._tabListContainer = document.getElementById('tab-list-container')!;
this._statusContainer = document.getElementById('status-container') as HTMLElement;
this._addButtonHandlers();
void this._loadTabs();
}
private _addButtonHandlers() {
document.addEventListener('DOMContentLoaded', async () => {
const statusContainer = document.getElementById('status-container') as HTMLElement;
const continueBtn = document.getElementById('continue-btn') as HTMLButtonElement;
const rejectBtn = document.getElementById('reject-btn') as HTMLButtonElement;
const buttonRow = document.querySelector('.button-row') as HTMLElement;
function showStatus(type: 'connected' | 'error' | 'connecting', message: string) {
const div = document.createElement('div');
div.className = `status ${type}`;
div.textContent = message;
statusContainer.replaceChildren(div);
}
const params = new URLSearchParams(window.location.search);
const mcpRelayUrl = params.get('mcpRelayUrl');
if (!mcpRelayUrl) {
buttonRow.style.display = 'none';
this._showStatus('error', 'Missing mcpRelayUrl parameter in URL.');
showStatus('error', 'Missing mcpRelayUrl parameter in URL.');
return;
}
@ -55,117 +41,30 @@ class ConnectPage {
const client = JSON.parse(params.get('client') || '{}');
clientInfo = `${client.name}/${client.version}`;
} catch (e) {
this._showStatus('error', 'Failed to parse client version.');
showStatus('error', 'Failed to parse client version.');
return;
}
this._showStatus('connecting', `MCP client "${clientInfo}" is trying to connect. Do you want to continue?`);
showStatus('connecting', `MCP client "${clientInfo}" is trying to connect. Do you want to continue?`);
rejectBtn.addEventListener('click', async () => {
buttonRow.style.display = 'none';
this._tabListContainer.style.display = 'none';
this._showStatus('error', 'Connection rejected. This tab can be closed.');
showStatus('error', 'Connection rejected. This tab can be closed.');
});
continueBtn.addEventListener('click', async () => {
buttonRow.style.display = 'none';
try {
const selectedTab = this._selectedTab;
if (!selectedTab) {
this._showStatus('error', 'Tab not selected.');
return;
}
const response = await chrome.runtime.sendMessage({
type: 'connectToMCPRelay',
mcpRelayUrl,
tabId: selectedTab.id,
windowId: selectedTab.windowId,
mcpRelayUrl
});
if (response?.success)
this._showStatus('connected', `MCP client "${clientInfo}" connected.`);
showStatus('connected', `MCP client "${clientInfo}" connected.`);
else
this._showStatus('error', response?.error || `MCP client "${clientInfo}" failed to connect.`);
showStatus('error', response?.error || `MCP client "${clientInfo}" failed to connect.`);
} catch (e) {
this._showStatus('error', `MCP client "${clientInfo}" failed to connect: ${e}`);
showStatus('error', `MCP client "${clientInfo}" failed to connect: ${e}`);
}
});
}
private async _loadTabs(): Promise<void> {
try {
const response = await chrome.runtime.sendMessage({ type: 'getTabs' });
if (response.success)
this._populateTabList(response.tabs, response.currentTabId);
else
this._showStatus('error', 'Failed to load tabs: ' + response.error);
} catch (error) {
this._showStatus('error', 'Failed to communicate with background script: ' + error);
}
}
private _populateTabList(tabs: TabInfo[], currentTabId: number): void {
this._tabList.replaceChildren();
this._selectedTab = tabs.find(tab => tab.id === currentTabId);
tabs.forEach((tab, index) => {
const tabElement = this._createTabElement(tab);
this._tabList.appendChild(tabElement);
});
}
private _createTabElement(tab: TabInfo): HTMLElement {
const disabled = tab.url.startsWith('chrome://');
const tabInfoDiv = document.createElement('div');
tabInfoDiv.className = 'tab-info';
tabInfoDiv.style.padding = '5px';
if (disabled)
tabInfoDiv.style.opacity = '0.5';
const radioButton = document.createElement('input');
radioButton.type = 'radio';
radioButton.name = 'tab-selection';
radioButton.checked = tab.id === this._selectedTab?.id;
radioButton.id = `tab-${tab.id}`;
radioButton.addEventListener('change', e => {
if (radioButton.checked)
this._selectedTab = tab;
});
if (disabled)
radioButton.disabled = true;
const favicon = document.createElement('img');
favicon.className = 'tab-favicon';
if (tab.favIconUrl)
favicon.src = tab.favIconUrl;
favicon.alt = '';
favicon.style.height = '16px';
favicon.style.width = '16px';
const title = document.createElement('span');
title.style.paddingLeft = '5px';
title.className = 'tab-title';
title.textContent = tab.title || 'Untitled';
const url = document.createElement('span');
url.style.paddingLeft = '5px';
url.className = 'tab-url';
url.textContent = tab.url;
tabInfoDiv.appendChild(radioButton);
tabInfoDiv.appendChild(favicon);
tabInfoDiv.appendChild(title);
tabInfoDiv.appendChild(url);
return tabInfoDiv;
}
private _showStatus(type: 'connected' | 'error' | 'connecting', message: string) {
const div = document.createElement('div');
div.className = `status ${type}`;
div.textContent = message;
this._statusContainer.replaceChildren(div);
}
}
new ConnectPage();

7
index.d.ts vendored
View File

@ -19,5 +19,10 @@ import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
import type { Config } from './config.js';
import type { BrowserContext } from 'playwright';
export declare function createConnection(config?: Config, contextGetter?: () => Promise<BrowserContext>): Promise<Server>;
export type Connection = {
server: Server;
close(): Promise<void>;
};
export declare function createConnection(config?: Config, contextGetter?: () => Promise<BrowserContext>): Promise<Connection>;
export {};

3
package-lock.json generated
View File

@ -12,7 +12,6 @@
"@modelcontextprotocol/sdk": "^1.16.0",
"commander": "^13.1.0",
"debug": "^4.4.1",
"dotenv": "^17.2.0",
"mime": "^4.0.7",
"playwright": "1.55.0-alpha-1752701791000",
"playwright-core": "1.55.0-alpha-1752701791000",
@ -35,6 +34,7 @@
"@typescript-eslint/eslint-plugin": "^8.26.1",
"@typescript-eslint/parser": "^8.26.1",
"@typescript-eslint/utils": "^8.26.1",
"dotenv": "^17.2.0",
"eslint": "^9.19.0",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-notice": "^1.0.0",
@ -1289,6 +1289,7 @@
"version": "17.2.0",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.0.tgz",
"integrity": "sha512-Q4sgBT60gzd0BB0lSyYD3xM4YxrXA9y4uBDof1JNYGzOXrQdQ6yX+7XIAqoFOGQFOTK1D3Hts5OllpxMDZFONQ==",
"dev": true,
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"

View File

@ -42,7 +42,6 @@
"@modelcontextprotocol/sdk": "^1.16.0",
"commander": "^13.1.0",
"debug": "^4.4.1",
"dotenv": "^17.2.0",
"mime": "^4.0.7",
"playwright": "1.55.0-alpha-1752701791000",
"playwright-core": "1.55.0-alpha-1752701791000",
@ -62,6 +61,7 @@
"@typescript-eslint/eslint-plugin": "^8.26.1",
"@typescript-eslint/parser": "^8.26.1",
"@typescript-eslint/utils": "^8.26.1",
"dotenv": "^17.2.0",
"eslint": "^9.19.0",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-notice": "^1.0.0",

View File

@ -217,7 +217,7 @@ async function injectCdpPort(browserConfig: FullConfig['browser']) {
(browserConfig.launchOptions as any).cdpPort = await findFreePort();
}
async function findFreePort(): Promise<number> {
async function findFreePort() {
return new Promise((resolve, reject) => {
const server = net.createServer();
server.listen(0, () => {

View File

@ -1,66 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { FullConfig } from './config.js';
import { Context } from './context.js';
import { logUnhandledError } from './log.js';
import { Response } from './response.js';
import { SessionLog } from './sessionLog.js';
import { filteredTools } from './tools.js';
import { packageJSON } from './package.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type * as mcpServer from './mcp/server.js';
import type { ServerBackend } from './mcp/server.js';
import type { Tool } from './tools/tool.js';
export class BrowserServerBackend implements ServerBackend {
name = 'Playwright';
version = packageJSON.version;
private _tools: Tool[];
private _context: Context;
private _sessionLog: SessionLog | undefined;
constructor(config: FullConfig, browserContextFactory: BrowserContextFactory) {
this._tools = filteredTools(config);
this._context = new Context(this._tools, config, browserContextFactory);
}
async initialize() {
this._sessionLog = this._context.config.saveSession ? await SessionLog.create(this._context.config) : undefined;
}
tools(): mcpServer.ToolSchema<any>[] {
return this._tools.map(tool => tool.schema);
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any) {
const response = new Response(this._context, schema.name, parsedArguments);
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
await tool.handle(this._context, parsedArguments, response);
if (this._sessionLog)
await this._sessionLog.log(response);
return await response.serialize();
}
serverInitialized(version: mcpServer.ClientVersion | undefined) {
this._context.clientVersion = version;
}
serverClosed() {
void this._context.dispose().catch(logUnhandledError);
}
}

View File

@ -53,10 +53,9 @@ export type CLIOptions = {
const defaultConfig: FullConfig = {
browser: {
browserName: 'chromium',
isolated: true,
launchOptions: {
channel: 'chrome',
headless: false,
headless: os.platform() === 'linux' && !process.env.DISPLAY,
chromiumSandbox: true,
},
contextOptions: {
@ -130,10 +129,8 @@ export function configFromCLIOptions(cliOptions: CLIOptions): Config {
const launchOptions: LaunchOptions = {
channel,
executablePath: cliOptions.executablePath,
headless: cliOptions.headless,
};
if (cliOptions.headless !== undefined) {
launchOptions.headless = cliOptions.headless;
}
// --no-sandbox was passed, disable the sandbox
if (cliOptions.sandbox === false)

94
src/connection.ts Normal file
View File

@ -0,0 +1,94 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Server as McpServer } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListToolsRequestSchema, Tool as McpTool } from '@modelcontextprotocol/sdk/types.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { Context } from './context.js';
import { Response } from './response.js';
import { allTools } from './tools.js';
import { packageJSON } from './package.js';
import { FullConfig } from './config.js';
import { SessionLog } from './sessionLog.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
export async function createConnection(config: FullConfig, browserContextFactory: BrowserContextFactory): Promise<Connection> {
const tools = allTools.filter(tool => tool.capability.startsWith('core') || config.capabilities?.includes(tool.capability));
const context = new Context(tools, config, browserContextFactory);
const server = new McpServer({ name: 'Playwright', version: packageJSON.version }, {
capabilities: {
tools: {},
}
});
const sessionLog = config.saveSession ? await SessionLog.create(config) : undefined;
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: tools.map(tool => ({
name: tool.schema.name,
description: tool.schema.description,
inputSchema: zodToJsonSchema(tool.schema.inputSchema),
annotations: {
title: tool.schema.title,
readOnlyHint: tool.schema.type === 'readOnly',
destructiveHint: tool.schema.type === 'destructive',
openWorldHint: true,
},
})) as McpTool[],
};
});
server.setRequestHandler(CallToolRequestSchema, async request => {
const errorResult = (...messages: string[]) => ({
content: [{ type: 'text', text: messages.join('\n') }],
isError: true,
});
const tool = tools.find(tool => tool.schema.name === request.params.name);
if (!tool)
return errorResult(`Tool "${request.params.name}" not found`);
try {
const response = new Response(context, request.params.name, request.params.arguments || {});
await tool.handle(context, tool.schema.inputSchema.parse(request.params.arguments || {}), response);
if (sessionLog)
await sessionLog.log(response);
return await response.serialize();
} catch (error) {
return errorResult(String(error));
}
});
return new Connection(server, context);
}
export class Connection {
readonly server: McpServer;
readonly context: Context;
constructor(server: McpServer, context: Context) {
this.server = server;
this.context = context;
this.server.oninitialized = () => {
this.context.clientVersion = this.server.getClientVersion();
};
}
async close() {
await this.server.close();
await this.context.close();
}
}

View File

@ -17,9 +17,7 @@
import debug from 'debug';
import * as playwright from 'playwright';
import { logUnhandledError } from './log.js';
import { Tab } from './tab.js';
import { EnvironmentIntrospector } from './environmentIntrospection.js';
import type { Tool } from './tools/tool.js';
import type { FullConfig } from './config.js';
@ -38,53 +36,12 @@ export class Context {
private _videoRecordingConfig: { dir: string; size?: { width: number; height: number } } | undefined;
private _videoBaseFilename: string | undefined;
private _activePagesWithVideos: Set<playwright.Page> = new Set();
private _environmentIntrospector: EnvironmentIntrospector;
private static _allContexts: Set<Context> = new Set();
private _closeBrowserContextPromise: Promise<void> | undefined;
// Session isolation properties
readonly sessionId: string;
private _sessionStartTime: number;
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory, environmentIntrospector?: EnvironmentIntrospector) {
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory) {
this.tools = tools;
this.config = config;
this._browserContextFactory = browserContextFactory;
this._environmentIntrospector = environmentIntrospector || new EnvironmentIntrospector();
// Generate unique session ID
this._sessionStartTime = Date.now();
this.sessionId = this._generateSessionId();
testDebug(`create context with sessionId: ${this.sessionId}`);
Context._allContexts.add(this);
}
static async disposeAll() {
await Promise.all([...Context._allContexts].map(context => context.dispose()));
}
private _generateSessionId(): string {
// Create a base session ID from timestamp and random
const baseId = `${this._sessionStartTime}-${Math.random().toString(36).substr(2, 9)}`;
// If we have client version info, incorporate it
if (this.clientVersion) {
const clientInfo = `${this.clientVersion.name || 'unknown'}-${this.clientVersion.version || 'unknown'}`;
return `${clientInfo}-${baseId}`;
}
return baseId;
}
updateSessionIdWithClientInfo() {
if (this.clientVersion) {
const newSessionId = this._generateSessionId();
testDebug(`updating sessionId from ${this.sessionId} to ${newSessionId}`);
// Note: sessionId is readonly, but we can update it during initialization
(this as any).sessionId = newSessionId;
}
testDebug('create context');
}
tabs(): Tab[] {
@ -178,17 +135,10 @@ export class Context {
if (this._currentTab === tab)
this._currentTab = this._tabs[Math.min(index, this._tabs.length - 1)];
if (!this._tabs.length)
void this.closeBrowserContext();
void this.close();
}
async closeBrowserContext() {
if (!this._closeBrowserContextPromise)
this._closeBrowserContextPromise = this._closeBrowserContextImpl().catch(logUnhandledError);
await this._closeBrowserContextPromise;
this._closeBrowserContextPromise = undefined;
}
private async _closeBrowserContextImpl() {
async close() {
if (!this._browserContextPromise)
return;
@ -204,11 +154,6 @@ export class Context {
});
}
async dispose() {
await this.closeBrowserContext();
Context._allContexts.delete(this);
}
private async _setupRequestInterception(context: playwright.BrowserContext) {
if (this.config.network?.allowedOrigins?.length) {
await context.route('**', route => route.abort('blockedbyclient'));
@ -234,8 +179,6 @@ export class Context {
}
private async _setupBrowserContext(): Promise<{ browserContext: playwright.BrowserContext, close: () => Promise<void> }> {
if (this._closeBrowserContextPromise)
throw new Error('Another browser context is being closed.');
let result: { browserContext: playwright.BrowserContext, close: () => Promise<void> };
if (this._videoRecordingConfig) {
@ -245,6 +188,7 @@ export class Context {
// Use the standard browser context factory
result = await this._browserContextFactory.createContext(this.clientVersion!);
}
const { browserContext } = result;
await this._setupRequestInterception(browserContext);
for (const page of browserContext.pages())
@ -265,26 +209,15 @@ export class Context {
// For video recording, we need to create an isolated context
const browserType = playwright[this.config.browser.browserName];
// Get environment-specific browser options
const envOptions = this._environmentIntrospector.getRecommendedBrowserOptions();
const browser = await browserType.launch({
...this.config.browser.launchOptions,
...envOptions, // Include environment-detected options
handleSIGINT: false,
handleSIGTERM: false,
});
// Use environment-specific video directory if available
const videoConfig = envOptions.recordVideo ?
{ ...this._videoRecordingConfig, dir: envOptions.recordVideo.dir } :
this._videoRecordingConfig;
const contextOptions = {
...this.config.browser.contextOptions,
recordVideo: videoConfig,
// Force isolated session for video recording with session-specific storage
storageState: undefined, // Always start fresh for video recording
recordVideo: this._videoRecordingConfig,
};
const browserContext = await browser.newContext(contextOptions);
@ -304,7 +237,7 @@ export class Context {
// Force recreation of browser context to include video recording
if (this._browserContextPromise) {
void this.closeBrowserContext().then(() => {
void this.close().then(() => {
// The next call to _ensureBrowserContext will create a new context with video recording
});
}
@ -319,55 +252,6 @@ export class Context {
};
}
updateEnvironmentRoots(roots: { uri: string; name?: string }[]) {
this._environmentIntrospector.updateRoots(roots);
// Log environment change
const summary = this._environmentIntrospector.getEnvironmentSummary();
testDebug(`environment updated for session ${this.sessionId}: ${summary}`);
// If we have an active browser context, we might want to recreate it
// For now, we'll just log the change - full recreation would close existing tabs
if (this._browserContextPromise)
testDebug(`browser context exists - environment changes will apply to new contexts`);
}
getEnvironmentIntrospector(): EnvironmentIntrospector {
return this._environmentIntrospector;
}
async updateBrowserConfig(changes: {
headless?: boolean;
viewport?: { width: number; height: number };
userAgent?: string;
}): Promise<void> {
const currentConfig = { ...this.config };
// Update the configuration
if (changes.headless !== undefined) {
currentConfig.browser.launchOptions.headless = changes.headless;
}
if (changes.viewport) {
currentConfig.browser.contextOptions.viewport = changes.viewport;
}
if (changes.userAgent) {
currentConfig.browser.contextOptions.userAgent = changes.userAgent;
}
// Store the modified config
(this as any).config = currentConfig;
// Close the current browser context to force recreation with new settings
await this.closeBrowserContext();
// Clear tabs since they're attached to the old context
this._tabs = [];
this._currentTab = undefined;
testDebug(`browser config updated for session ${this.sessionId}: headless=${currentConfig.browser.launchOptions.headless}, viewport=${JSON.stringify(currentConfig.browser.contextOptions.viewport)}`);
}
async stopVideoRecording(): Promise<string[]> {
if (!this._videoRecordingConfig)
return [];

119
src/eval/loopClaude.ts Normal file
View File

@ -0,0 +1,119 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import Anthropic from '@anthropic-ai/sdk';
import debug from 'debug';
import type { Tool, ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
const model = 'claude-sonnet-4-20250514';
export async function runTask(client: Client, task: string): Promise<string | undefined> {
const anthropic = new Anthropic();
const messages: Anthropic.Messages.MessageParam[] = [];
const { tools } = await client.listTools();
const claudeTools = tools.map(tool => asClaudeDeclaration(tool));
// Add initial user message
messages.push({
role: 'user',
content: `Perform following task: ${task}.`
});
for (let iteration = 0; iteration < 5; ++iteration) {
debug('history')(messages);
const response = await anthropic.messages.create({
model,
max_tokens: 10000,
messages,
tools: claudeTools,
});
const content = response.content;
const toolUseBlocks = content.filter(block => block.type === 'tool_use');
const textBlocks = content.filter(block => block.type === 'text');
messages.push({
role: 'assistant',
content: content
});
if (toolUseBlocks.length === 0)
return textBlocks.map(block => block.text).join('\n');
const toolResults: Anthropic.Messages.ToolResultBlockParam[] = [];
for (const toolUse of toolUseBlocks) {
if (toolUse.name === 'done')
return JSON.stringify(toolUse.input, null, 2);
try {
debug('tool')(toolUse.name, toolUse.input);
const response = await client.callTool({
name: toolUse.name,
arguments: toolUse.input as any,
});
const responseContent = (response.content || []) as (TextContent | ImageContent)[];
debug('tool')(responseContent);
const text = responseContent.filter(part => part.type === 'text').map(part => part.text).join('\n');
toolResults.push({
type: 'tool_result',
tool_use_id: toolUse.id,
content: text,
});
} catch (error) {
debug('tool')(error);
toolResults.push({
type: 'tool_result',
tool_use_id: toolUse.id,
content: `Error while executing tool "${toolUse.name}": ${error instanceof Error ? error.message : String(error)}\n\nPlease try to recover and complete the task.`,
is_error: true,
});
// Skip remaining tool calls for this iteration
for (const remainingToolUse of toolUseBlocks.slice(toolUseBlocks.indexOf(toolUse) + 1)) {
toolResults.push({
type: 'tool_result',
tool_use_id: remainingToolUse.id,
content: `This tool call is skipped due to previous error.`,
is_error: true,
});
}
break;
}
}
// Add tool results as user message
messages.push({
role: 'user',
content: toolResults
});
}
throw new Error('Failed to perform step, max attempts reached');
}
function asClaudeDeclaration(tool: Tool): Anthropic.Messages.Tool {
return {
name: tool.name,
description: tool.description,
input_schema: tool.inputSchema,
};
}

105
src/eval/loopOpenAI.ts Normal file
View File

@ -0,0 +1,105 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import OpenAI from 'openai';
import debug from 'debug';
import type { Tool, ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
const model = 'gpt-4.1';
export async function runTask(client: Client, task: string): Promise<string | undefined> {
const openai = new OpenAI();
const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [
{
role: 'user',
content: `Peform following task: ${task}. Once the task is complete, call the "done" tool.`
}
];
const { tools } = await client.listTools();
for (let iteration = 0; iteration < 5; ++iteration) {
debug('history')(messages);
const response = await openai.chat.completions.create({
model,
messages,
tools: tools.map(tool => asOpenAIDeclaration(tool)),
tool_choice: 'auto'
});
const message = response.choices[0].message;
if (!message.tool_calls?.length)
return JSON.stringify(message.content, null, 2);
messages.push({
role: 'assistant',
tool_calls: message.tool_calls
});
for (const toolCall of message.tool_calls) {
const functionCall = toolCall.function;
if (functionCall.name === 'done')
return JSON.stringify(functionCall.arguments, null, 2);
try {
debug('tool')(functionCall.name, functionCall.arguments);
const response = await client.callTool({
name: functionCall.name,
arguments: JSON.parse(functionCall.arguments)
});
const content = (response.content || []) as (TextContent | ImageContent)[];
debug('tool')(content);
const text = content.filter(part => part.type === 'text').map(part => part.text).join('\n');
messages.push({
role: 'tool',
tool_call_id: toolCall.id,
content: text,
});
} catch (error) {
debug('tool')(error);
messages.push({
role: 'tool',
tool_call_id: toolCall.id,
content: `Error while executing tool "${functionCall.name}": ${error instanceof Error ? error.message : String(error)}\n\nPlease try to recover and complete the task.`,
});
for (const ignoredToolCall of message.tool_calls.slice(message.tool_calls.indexOf(toolCall) + 1)) {
messages.push({
role: 'tool',
tool_call_id: ignoredToolCall.id,
content: `This tool call is skipped due to previous error.`,
});
}
break;
}
}
}
throw new Error('Failed to perform step, max attempts reached');
}
function asOpenAIDeclaration(tool: Tool): OpenAI.Chat.Completions.ChatCompletionTool {
return {
type: 'function',
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
},
};
}

View File

@ -23,17 +23,14 @@ import dotenv from 'dotenv';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { program } from 'commander';
import { OpenAIDelegate } from './loopOpenAI.js';
import { ClaudeDelegate } from './loopClaude.js';
import { runTask } from './loop.js';
import type { LLMDelegate } from './loop.js';
import { runTask as runTaskOpenAI } from './loopOpenAI.js';
import { runTask as runTaskClaude } from './loopClaude.js';
dotenv.config();
const __filename = url.fileURLToPath(import.meta.url);
async function run(delegate: LLMDelegate) {
async function run(runTask: (client: Client, task: string) => Promise<string | undefined>) {
const transport = new StdioClientTransport({
command: 'node',
args: [
@ -49,11 +46,10 @@ async function run(delegate: LLMDelegate) {
await client.connect(transport);
await client.ping();
for (const task of tasks) {
const messages = await runTask(delegate, client, task);
for (const message of messages)
console.log(`${message.role}: ${message.content}`);
}
let lastResult: string | undefined;
for (const task of tasks)
lastResult = await runTask(client, task);
console.log(lastResult);
await client.close();
}
@ -65,8 +61,8 @@ program
.option('--model <model>', 'model to use')
.action(async options => {
if (options.model === 'claude')
await run(new ClaudeDelegate());
await run(runTaskClaude);
else
await run(new OpenAIDelegate());
await run(runTaskOpenAI);
});
void program.parseAsync(process.argv);

View File

@ -27,12 +27,11 @@ import { spawn } from 'child_process';
import { WebSocket, WebSocketServer } from 'ws';
import debug from 'debug';
import * as playwright from 'playwright';
import { httpAddressToString, startHttpServer } from '../transport.js';
import { BrowserContextFactory } from '../browserContextFactory.js';
// @ts-ignore
const { registry } = await import('playwright-core/lib/server/registry/index');
import { httpAddressToString, startHttpServer } from '../httpServer.js';
import { logUnhandledError } from '../log.js';
import { ManualPromise } from '../manualPromise.js';
import type { BrowserContextFactory } from '../browserContextFactory.js';
import type websocket from 'ws';
const debugLogger = debug('pw:mcp:relay');
@ -67,7 +66,8 @@ export class CDPRelayServer {
sessionId: string;
} | undefined;
private _nextSessionId: number = 1;
private _extensionConnectionPromise!: ManualPromise<void>;
private _extensionConnectionPromise: Promise<void>;
private _extensionConnectionResolve: (() => void) | null = null;
constructor(server: http.Server, browserChannel: string) {
this._wsHost = httpAddressToString(server.address()).replace(/^http/, 'ws');
@ -77,7 +77,9 @@ export class CDPRelayServer {
this._cdpPath = `/cdp/${uuid}`;
this._extensionPath = `/extension/${uuid}`;
this._resetExtensionConnection();
this._extensionConnectionPromise = new Promise(resolve => {
this._extensionConnectionResolve = resolve;
});
this._wss = new WebSocketServer({ server });
this._wss.on('connection', this._onConnection.bind(this));
}
@ -165,15 +167,15 @@ export class CDPRelayServer {
private _closeExtensionConnection(reason: string) {
this._extensionConnection?.close(reason);
this._extensionConnectionPromise.reject(new Error(reason));
this._resetExtensionConnection();
}
private _resetExtensionConnection() {
this._connectedTabInfo = undefined;
this._extensionConnection = null;
this._extensionConnectionPromise = new ManualPromise();
void this._extensionConnectionPromise.catch(logUnhandledError);
this._extensionConnectionPromise = new Promise(resolve => {
this._extensionConnectionResolve = resolve;
});
}
private _closePlaywrightConnection(reason: string) {
@ -196,7 +198,7 @@ export class CDPRelayServer {
this._closePlaywrightConnection(`Extension disconnected: ${reason}`);
};
this._extensionConnection.onmessage = this._handleExtensionMessage.bind(this);
this._extensionConnectionPromise.resolve();
this._extensionConnectionResolve?.();
}
private _handleExtensionMessage(method: string, params: any) {
@ -305,9 +307,7 @@ class ExtensionContextFactory implements BrowserContextFactory {
const browser = await this._browserPromise;
return {
browserContext: browser.contexts()[0],
close: async () => {
debugLogger('close() called for browser context, ignoring');
}
close: async () => {}
};
}
@ -322,10 +322,10 @@ class ExtensionContextFactory implements BrowserContextFactory {
}
}
export async function startCDPRelayServer(browserChannel: string, abortController: AbortController) {
const httpServer = await startHttpServer({});
export async function startCDPRelayServer(port: number, browserChannel: string) {
const httpServer = await startHttpServer({ port });
const cdpRelayServer = new CDPRelayServer(httpServer, browserChannel);
abortController.signal.addEventListener('abort', () => cdpRelayServer.stop());
process.on('exit', () => cdpRelayServer.stop());
debugLogger(`CDP relay server started, extension endpoint: ${cdpRelayServer.extensionEndpoint()}.`);
return new ExtensionContextFactory(cdpRelayServer);
}

View File

@ -14,14 +14,24 @@
* limitations under the License.
*/
import { resolveCLIConfig } from '../config.js';
import { startHttpServer, startHttpTransport, startStdioTransport } from '../transport.js';
import { Server } from '../server.js';
import { startCDPRelayServer } from './cdpRelay.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import * as mcpTransport from '../mcp/transport.js';
import type { FullConfig } from '../config.js';
import type { CLIOptions } from '../config.js';
export async function runWithExtension(config: FullConfig, abortController: AbortController) {
const contextFactory = await startCDPRelayServer(config.browser.launchOptions.channel || 'chrome', abortController);
const serverBackendFactory = () => new BrowserServerBackend(config, contextFactory);
await mcpTransport.start(serverBackendFactory, config.server);
export async function runWithExtension(options: CLIOptions) {
const config = await resolveCLIConfig(options);
const contextFactory = await startCDPRelayServer(9225, config.browser.launchOptions.channel || 'chrome');
const server = new Server(config, contextFactory);
server.setupExitWatchdog();
if (options.port !== undefined) {
const httpServer = await startHttpServer({ port: options.port });
startHttpTransport(httpServer, server);
} else {
await startStdioTransport(server);
}
}

View File

@ -14,31 +14,219 @@
* limitations under the License.
*/
import assert from 'assert';
import fs from 'fs';
import path from 'path';
import http from 'http';
import net from 'net';
import type * as net from 'net';
import mime from 'mime';
export async function startHttpServer(config: { host?: string, port?: number }): Promise<http.Server> {
const { host, port } = config;
const httpServer = http.createServer();
await new Promise<void>((resolve, reject) => {
httpServer.on('error', reject);
httpServer.listen(port, host, () => {
resolve();
httpServer.removeListener('error', reject);
});
});
return httpServer;
import { ManualPromise } from './manualPromise.js';
export type ServerRouteHandler = (request: http.IncomingMessage, response: http.ServerResponse) => void;
export type Transport = {
sendEvent?: (method: string, params: any) => void;
close?: () => void;
onconnect: () => void;
dispatch: (method: string, params: any) => Promise<any>;
onclose: () => void;
};
export class HttpServer {
private _server: http.Server;
private _urlPrefixPrecise: string = '';
private _urlPrefixHumanReadable: string = '';
private _port: number = 0;
private _routes: { prefix?: string, exact?: string, handler: ServerRouteHandler }[] = [];
constructor() {
this._server = http.createServer(this._onRequest.bind(this));
decorateServer(this._server);
}
export function httpAddressToString(address: string | net.AddressInfo | null): string {
assert(address, 'Could not bind server socket');
if (typeof address === 'string')
return address;
const resolvedPort = address.port;
let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`;
if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]')
resolvedHost = 'localhost';
return `http://${resolvedHost}:${resolvedPort}`;
server() {
return this._server;
}
routePrefix(prefix: string, handler: ServerRouteHandler) {
this._routes.push({ prefix, handler });
}
routePath(path: string, handler: ServerRouteHandler) {
this._routes.push({ exact: path, handler });
}
port(): number {
return this._port;
}
private async _tryStart(port: number | undefined, host: string) {
const errorPromise = new ManualPromise();
const errorListener = (error: Error) => errorPromise.reject(error);
this._server.on('error', errorListener);
try {
this._server.listen(port, host);
await Promise.race([
new Promise(cb => this._server!.once('listening', cb)),
errorPromise,
]);
} finally {
this._server.removeListener('error', errorListener);
}
}
async start(options: { port?: number, preferredPort?: number, host?: string } = {}): Promise<void> {
const host = options.host || 'localhost';
if (options.preferredPort) {
try {
await this._tryStart(options.preferredPort, host);
} catch (e: any) {
if (!e || !e.message || !e.message.includes('EADDRINUSE'))
throw e;
await this._tryStart(undefined, host);
}
} else {
await this._tryStart(options.port, host);
}
const address = this._server.address();
if (typeof address === 'string') {
this._urlPrefixPrecise = address;
this._urlPrefixHumanReadable = address;
} else {
this._port = address!.port;
const resolvedHost = address!.family === 'IPv4' ? address!.address : `[${address!.address}]`;
this._urlPrefixPrecise = `http://${resolvedHost}:${address!.port}`;
this._urlPrefixHumanReadable = `http://${host}:${address!.port}`;
}
}
async stop() {
await new Promise(cb => this._server!.close(cb));
}
urlPrefix(purpose: 'human-readable' | 'precise'): string {
return purpose === 'human-readable' ? this._urlPrefixHumanReadable : this._urlPrefixPrecise;
}
serveFile(request: http.IncomingMessage, response: http.ServerResponse, absoluteFilePath: string, headers?: { [name: string]: string }): boolean {
try {
for (const [name, value] of Object.entries(headers || {}))
response.setHeader(name, value);
if (request.headers.range)
this._serveRangeFile(request, response, absoluteFilePath);
else
this._serveFile(response, absoluteFilePath);
return true;
} catch (e) {
return false;
}
}
_serveFile(response: http.ServerResponse, absoluteFilePath: string) {
const content = fs.readFileSync(absoluteFilePath);
response.statusCode = 200;
const contentType = mime.getType(path.extname(absoluteFilePath)) || 'application/octet-stream';
response.setHeader('Content-Type', contentType);
response.setHeader('Content-Length', content.byteLength);
response.end(content);
}
_serveRangeFile(request: http.IncomingMessage, response: http.ServerResponse, absoluteFilePath: string) {
const range = request.headers.range;
if (!range || !range.startsWith('bytes=') || range.includes(', ') || [...range].filter(char => char === '-').length !== 1) {
response.statusCode = 400;
return response.end('Bad request');
}
// Parse the range header: https://datatracker.ietf.org/doc/html/rfc7233#section-2.1
const [startStr, endStr] = range.replace(/bytes=/, '').split('-');
// Both start and end (when passing to fs.createReadStream) and the range header are inclusive and start counting at 0.
let start: number;
let end: number;
const size = fs.statSync(absoluteFilePath).size;
if (startStr !== '' && endStr === '') {
// No end specified: use the whole file
start = +startStr;
end = size - 1;
} else if (startStr === '' && endStr !== '') {
// No start specified: calculate start manually
start = size - +endStr;
end = size - 1;
} else {
start = +startStr;
end = +endStr;
}
// Handle unavailable range request
if (Number.isNaN(start) || Number.isNaN(end) || start >= size || end >= size || start > end) {
// Return the 416 Range Not Satisfiable: https://datatracker.ietf.org/doc/html/rfc7233#section-4.4
response.writeHead(416, {
'Content-Range': `bytes */${size}`
});
return response.end();
}
// Sending Partial Content: https://datatracker.ietf.org/doc/html/rfc7233#section-4.1
response.writeHead(206, {
'Content-Range': `bytes ${start}-${end}/${size}`,
'Accept-Ranges': 'bytes',
'Content-Length': end - start + 1,
'Content-Type': mime.getType(path.extname(absoluteFilePath))!,
});
const readable = fs.createReadStream(absoluteFilePath, { start, end });
readable.pipe(response);
}
private _onRequest(request: http.IncomingMessage, response: http.ServerResponse) {
if (request.method === 'OPTIONS') {
response.writeHead(200);
response.end();
return;
}
request.on('error', () => response.end());
try {
if (!request.url) {
response.end();
return;
}
const url = new URL('http://localhost' + request.url);
for (const route of this._routes) {
if (route.exact && url.pathname === route.exact) {
route.handler(request, response);
return;
}
if (route.prefix && url.pathname.startsWith(route.prefix)) {
route.handler(request, response);
return;
}
}
response.statusCode = 404;
response.end();
} catch (e) {
response.end();
}
}
}
function decorateServer(server: net.Server) {
const sockets = new Set<net.Socket>();
server.on('connection', socket => {
sockets.add(socket);
socket.once('close', () => sockets.delete(socket));
});
const close = server.close;
server.close = (callback?: (err?: Error) => void) => {
for (const socket of sockets)
socket.destroy();
sockets.clear();
return close.call(server, callback);
};
}

View File

@ -14,20 +14,18 @@
* limitations under the License.
*/
import { BrowserServerBackend } from './browserServerBackend.js';
import { createConnection as createConnectionImpl } from './connection.js';
import { resolveConfig } from './config.js';
import { contextFactory } from './browserContextFactory.js';
import * as mcpServer from './mcp/server.js';
import type { Connection } from '../index.js';
import type { Config } from '../config.js';
import type { BrowserContext } from 'playwright';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
export async function createConnection(userConfig: Config = {}, contextGetter?: () => Promise<BrowserContext>): Promise<Server> {
export async function createConnection(userConfig: Config = {}, contextGetter?: () => Promise<BrowserContext>): Promise<Connection> {
const config = await resolveConfig(userConfig);
const factory = contextGetter ? new SimpleBrowserContextFactory(contextGetter) : contextFactory(config.browser);
return mcpServer.createServer(new BrowserServerBackend(config, factory));
return createConnectionImpl(config, factory);
}
class SimpleBrowserContextFactory implements BrowserContextFactory {

View File

@ -1,108 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import debug from 'debug';
import type { Tool, ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js';
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
export type LLMToolCall = {
name: string;
arguments: any;
id: string;
};
export type LLMTool = {
name: string;
description: string;
inputSchema: any;
};
export type LLMMessage =
| { role: 'user'; content: string }
| { role: 'assistant'; content: string; toolCalls?: LLMToolCall[] }
| { role: 'tool'; toolCallId: string; content: string; isError?: boolean };
export type LLMConversation = {
messages: LLMMessage[];
tools: LLMTool[];
};
export interface LLMDelegate {
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation;
makeApiCall(conversation: LLMConversation): Promise<LLMToolCall[]>;
addToolResults(conversation: LLMConversation, results: Array<{ toolCallId: string; content: string; isError?: boolean }>): void;
checkDoneToolCall(toolCall: LLMToolCall): string | null;
}
export async function runTask(delegate: LLMDelegate, client: Client, task: string, oneShot: boolean = false): Promise<LLMMessage[]> {
const { tools } = await client.listTools();
const taskContent = oneShot ? `Perform following task: ${task}.` : `Perform following task: ${task}. Once the task is complete, call the "done" tool.`;
const conversation = delegate.createConversation(taskContent, tools, oneShot);
for (let iteration = 0; iteration < 5; ++iteration) {
debug('history')('Making API call for iteration', iteration);
const toolCalls = await delegate.makeApiCall(conversation);
if (toolCalls.length === 0)
throw new Error('Call the "done" tool when the task is complete.');
const toolResults: Array<{ toolCallId: string; content: string; isError?: boolean }> = [];
for (const toolCall of toolCalls) {
const doneResult = delegate.checkDoneToolCall(toolCall);
if (doneResult !== null)
return conversation.messages;
const { name, arguments: args, id } = toolCall;
try {
debug('tool')(name, args);
const response = await client.callTool({
name,
arguments: args,
});
const responseContent = (response.content || []) as (TextContent | ImageContent)[];
debug('tool')(responseContent);
const text = responseContent.filter(part => part.type === 'text').map(part => part.text).join('\n');
toolResults.push({
toolCallId: id,
content: text,
});
} catch (error) {
debug('tool')(error);
toolResults.push({
toolCallId: id,
content: `Error while executing tool "${name}": ${error instanceof Error ? error.message : String(error)}\n\nPlease try to recover and complete the task.`,
isError: true,
});
// Skip remaining tool calls for this iteration
for (const remainingToolCall of toolCalls.slice(toolCalls.indexOf(toolCall) + 1)) {
toolResults.push({
toolCallId: remainingToolCall.id,
content: `This tool call is skipped due to previous error.`,
isError: true,
});
}
break;
}
}
delegate.addToolResults(conversation, toolResults);
if (oneShot)
return conversation.messages;
}
throw new Error('Failed to perform step, max attempts reached');
}

View File

@ -1,177 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type Anthropic from '@anthropic-ai/sdk';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'claude-sonnet-4-20250514';
export class ClaudeDelegate implements LLMDelegate {
private _anthropic: Anthropic | undefined;
async anthropic(): Promise<Anthropic> {
if (!this._anthropic) {
const anthropic = await import('@anthropic-ai/sdk');
this._anthropic = new anthropic.Anthropic();
}
return this._anthropic;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
const llmTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
if (!oneShot) {
llmTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {},
},
});
}
return {
messages: [{
role: 'user',
content: task
}],
tools: llmTools,
};
}
async makeApiCall(conversation: LLMConversation): Promise<LLMToolCall[]> {
// Convert generic messages to Claude format
const claudeMessages: Anthropic.Messages.MessageParam[] = [];
for (const message of conversation.messages) {
if (message.role === 'user') {
claudeMessages.push({
role: 'user',
content: message.content
});
} else if (message.role === 'assistant') {
const content: Anthropic.Messages.ContentBlock[] = [];
// Add text content
if (message.content) {
content.push({
type: 'text',
text: message.content,
citations: []
});
}
// Add tool calls
if (message.toolCalls) {
for (const toolCall of message.toolCalls) {
content.push({
type: 'tool_use',
id: toolCall.id,
name: toolCall.name,
input: toolCall.arguments
});
}
}
claudeMessages.push({
role: 'assistant',
content
});
} else if (message.role === 'tool') {
// Tool results are added differently - we need to find if there's already a user message with tool results
const lastMessage = claudeMessages[claudeMessages.length - 1];
const toolResult: Anthropic.Messages.ToolResultBlockParam = {
type: 'tool_result',
tool_use_id: message.toolCallId,
content: message.content,
is_error: message.isError,
};
if (lastMessage && lastMessage.role === 'user' && Array.isArray(lastMessage.content)) {
// Add to existing tool results message
(lastMessage.content as Anthropic.Messages.ToolResultBlockParam[]).push(toolResult);
} else {
// Create new tool results message
claudeMessages.push({
role: 'user',
content: [toolResult]
});
}
}
}
// Convert generic tools to Claude format
const claudeTools: Anthropic.Messages.Tool[] = conversation.tools.map(tool => ({
name: tool.name,
description: tool.description,
input_schema: tool.inputSchema,
}));
const anthropic = await this.anthropic();
const response = await anthropic.messages.create({
model,
max_tokens: 10000,
messages: claudeMessages,
tools: claudeTools,
});
// Extract tool calls and add assistant message to generic conversation
const toolCalls = response.content.filter(block => block.type === 'tool_use') as Anthropic.Messages.ToolUseBlock[];
const textContent = response.content.filter(block => block.type === 'text').map(block => (block as Anthropic.Messages.TextBlock).text).join('');
const llmToolCalls: LLMToolCall[] = toolCalls.map(toolCall => ({
name: toolCall.name,
arguments: toolCall.input as any,
id: toolCall.id,
}));
// Add assistant message to generic conversation
conversation.messages.push({
role: 'assistant',
content: textContent,
toolCalls: llmToolCalls.length > 0 ? llmToolCalls : undefined
});
return llmToolCalls;
}
addToolResults(
conversation: LLMConversation,
results: Array<{ toolCallId: string; content: string; isError?: boolean }>
): void {
for (const result of results) {
conversation.messages.push({
role: 'tool',
toolCallId: result.toolCallId,
content: result.content,
isError: result.isError,
});
}
}
checkDoneToolCall(toolCall: LLMToolCall): string | null {
if (toolCall.name === 'done')
return (toolCall.arguments as { result: string }).result;
return null;
}
}

View File

@ -1,168 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type OpenAI from 'openai';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'gpt-4.1';
export class OpenAIDelegate implements LLMDelegate {
private _openai: OpenAI | undefined;
async openai(): Promise<OpenAI> {
if (!this._openai) {
const oai = await import('openai');
this._openai = new oai.OpenAI();
}
return this._openai;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
const genericTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
if (!oneShot) {
genericTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {},
},
});
}
return {
messages: [{
role: 'user',
content: task
}],
tools: genericTools,
};
}
async makeApiCall(conversation: LLMConversation): Promise<LLMToolCall[]> {
// Convert generic messages to OpenAI format
const openaiMessages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [];
for (const message of conversation.messages) {
if (message.role === 'user') {
openaiMessages.push({
role: 'user',
content: message.content
});
} else if (message.role === 'assistant') {
const toolCalls: OpenAI.Chat.Completions.ChatCompletionMessageToolCall[] = [];
if (message.toolCalls) {
for (const toolCall of message.toolCalls) {
toolCalls.push({
id: toolCall.id,
type: 'function',
function: {
name: toolCall.name,
arguments: JSON.stringify(toolCall.arguments)
}
});
}
}
const assistantMessage: OpenAI.Chat.Completions.ChatCompletionAssistantMessageParam = {
role: 'assistant'
};
if (message.content)
assistantMessage.content = message.content;
if (toolCalls.length > 0)
assistantMessage.tool_calls = toolCalls;
openaiMessages.push(assistantMessage);
} else if (message.role === 'tool') {
openaiMessages.push({
role: 'tool',
tool_call_id: message.toolCallId,
content: message.content,
});
}
}
// Convert generic tools to OpenAI format
const openaiTools: OpenAI.Chat.Completions.ChatCompletionTool[] = conversation.tools.map(tool => ({
type: 'function',
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
},
}));
const openai = await this.openai();
const response = await openai.chat.completions.create({
model,
messages: openaiMessages,
tools: openaiTools,
tool_choice: 'auto'
});
const message = response.choices[0].message;
// Extract tool calls and add assistant message to generic conversation
const toolCalls = message.tool_calls || [];
const genericToolCalls: LLMToolCall[] = toolCalls.map(toolCall => {
const functionCall = toolCall.function;
return {
name: functionCall.name,
arguments: JSON.parse(functionCall.arguments),
id: toolCall.id,
};
});
// Add assistant message to generic conversation
conversation.messages.push({
role: 'assistant',
content: message.content || '',
toolCalls: genericToolCalls.length > 0 ? genericToolCalls : undefined
});
return genericToolCalls;
}
addToolResults(
conversation: LLMConversation,
results: Array<{ toolCallId: string; content: string; isError?: boolean }>
): void {
for (const result of results) {
conversation.messages.push({
role: 'tool',
toolCallId: result.toolCallId,
content: result.content,
isError: result.isError,
});
}
}
checkDoneToolCall(toolCall: LLMToolCall): string | null {
if (toolCall.name === 'done')
return toolCall.arguments.result;
return null;
}
}

View File

@ -1,77 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { contextFactory } from '../browserContextFactory.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import { Context as BrowserContext } from '../context.js';
import { runTask } from '../loop/loop.js';
import { OpenAIDelegate } from '../loop/loopOpenAI.js';
import { ClaudeDelegate } from '../loop/loopClaude.js';
import { InProcessTransport } from '../mcp/inProcessTransport.js';
import * as mcpServer from '../mcp/server.js';
import type { LLMDelegate } from '../loop/loop.js';
import type { FullConfig } from '../config.js';
export class Context {
readonly config: FullConfig;
private _client: Client;
private _delegate: LLMDelegate;
constructor(config: FullConfig, client: Client) {
this.config = config;
this._client = client;
if (process.env.OPENAI_API_KEY)
this._delegate = new OpenAIDelegate();
else if (process.env.ANTHROPIC_API_KEY)
this._delegate = new ClaudeDelegate();
else
throw new Error('No LLM API key found. Please set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable.');
}
static async create(config: FullConfig) {
const client = new Client({ name: 'Playwright Proxy', version: '1.0.0' });
const browserContextFactory = contextFactory(config.browser);
const server = mcpServer.createServer(new BrowserServerBackend(config, browserContextFactory));
await client.connect(new InProcessTransport(server));
await client.ping();
return new Context(config, client);
}
async runTask(task: string, oneShot: boolean = false): Promise<mcpServer.ToolResponse> {
const messages = await runTask(this._delegate, this._client!, task, oneShot);
const lines: string[] = [];
// Skip the first message, which is the user's task.
for (const message of messages.slice(1)) {
// Trim out all page snapshots.
if (!message.content.trim())
continue;
const index = oneShot ? -1 : message.content.indexOf('### Page state');
const trimmedContent = index === -1 ? message.content : message.content.substring(0, index);
lines.push(`[${message.role}]:`, trimmedContent);
}
return {
content: [{ type: 'text', text: lines.join('\n') }],
};
}
async close() {
await BrowserContext.disposeAll();
}
}

View File

@ -1,63 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import dotenv from 'dotenv';
import * as mcpServer from '../mcp/server.js';
import * as mcpTransport from '../mcp/transport.js';
import { packageJSON } from '../package.js';
import { Context } from './context.js';
import { perform } from './perform.js';
import { snapshot } from './snapshot.js';
import type { FullConfig } from '../config.js';
import type { ServerBackend } from '../mcp/server.js';
import type { Tool } from './tool.js';
export async function runLoopTools(config: FullConfig) {
dotenv.config();
const serverBackendFactory = () => new LoopToolsServerBackend(config);
await mcpTransport.start(serverBackendFactory, config.server);
}
class LoopToolsServerBackend implements ServerBackend {
readonly name = 'Playwright';
readonly version = packageJSON.version;
private _config: FullConfig;
private _context: Context | undefined;
private _tools: Tool<any>[] = [perform, snapshot];
constructor(config: FullConfig) {
this._config = config;
}
async initialize() {
this._context = await Context.create(this._config);
}
tools(): mcpServer.ToolSchema<any>[] {
return this._tools.map(tool => tool.schema);
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any): Promise<mcpServer.ToolResponse> {
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
return await tool.handle(this._context!, parsedArguments);
}
serverClosed() {
void this._context!.close();
}
}

View File

@ -1,36 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
const performSchema = z.object({
task: z.string().describe('The task to perform with the browser'),
});
export const perform = defineTool({
schema: {
name: 'browser_perform',
title: 'Perform a task with the browser',
description: 'Perform a task with the browser. It can click, type, export, capture screenshot, drag, hover, select options, etc.',
inputSchema: performSchema,
type: 'destructive',
},
handle: async (context, params) => {
return await context.runTask(params.task);
},
});

View File

@ -1,32 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
export const snapshot = defineTool({
schema: {
name: 'browser_snapshot',
title: 'Take a snapshot of the browser',
description: 'Take a snapshot of the browser to read what is on the page.',
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async (context, params) => {
return await context.runTask('Capture browser snapshot', true);
},
});

View File

@ -1,29 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { z } from 'zod';
import type * as mcpServer from '../mcp/server.js';
import type { Context } from './context.js';
export type Tool<Input extends z.Schema = z.Schema> = {
schema: mcpServer.ToolSchema<Input>;
handle: (context: Context, params: z.output<Input>) => Promise<mcpServer.ToolResponse>;
};
export function defineTool<Input extends z.Schema>(tool: Tool<Input>): Tool<Input> {
return tool;
}

View File

@ -1 +0,0 @@
- Generic MCP utils, no dependencies on Playwright here.

View File

@ -1,92 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
import type { Transport, TransportSendOptions } from '@modelcontextprotocol/sdk/shared/transport.js';
import type { JSONRPCMessage, MessageExtraInfo } from '@modelcontextprotocol/sdk/types.js';
export class InProcessTransport implements Transport {
private _server: Server;
private _serverTransport: InProcessServerTransport;
private _connected: boolean = false;
constructor(server: Server) {
this._server = server;
this._serverTransport = new InProcessServerTransport(this);
}
async start(): Promise<void> {
if (this._connected)
throw new Error('InprocessTransport already started!');
await this._server.connect(this._serverTransport);
this._connected = true;
}
async send(message: JSONRPCMessage, options?: TransportSendOptions): Promise<void> {
if (!this._connected)
throw new Error('Transport not connected');
this._serverTransport._receiveFromClient(message);
}
async close(): Promise<void> {
if (this._connected) {
this._connected = false;
this.onclose?.();
this._serverTransport.onclose?.();
}
}
onclose?: (() => void) | undefined;
onerror?: ((error: Error) => void) | undefined;
onmessage?: ((message: JSONRPCMessage, extra?: MessageExtraInfo) => void) | undefined;
sessionId?: string | undefined;
setProtocolVersion?: ((version: string) => void) | undefined;
_receiveFromServer(message: JSONRPCMessage, extra?: MessageExtraInfo): void {
this.onmessage?.(message, extra);
}
}
class InProcessServerTransport implements Transport {
private _clientTransport: InProcessTransport;
constructor(clientTransport: InProcessTransport) {
this._clientTransport = clientTransport;
}
async start(): Promise<void> {
}
async send(message: JSONRPCMessage, options?: TransportSendOptions): Promise<void> {
this._clientTransport._receiveFromServer(message);
}
async close(): Promise<void> {
this.onclose?.();
}
onclose?: (() => void) | undefined;
onerror?: ((error: Error) => void) | undefined;
onmessage?: ((message: JSONRPCMessage, extra?: MessageExtraInfo) => void) | undefined;
sessionId?: string | undefined;
setProtocolVersion?: ((version: string) => void) | undefined;
_receiveFromClient(message: JSONRPCMessage): void {
this.onmessage?.(message);
}
}

View File

@ -1,105 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { ImageContent, Implementation, TextContent } from '@modelcontextprotocol/sdk/types.js';
import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
export type ClientVersion = Implementation;
export type ToolResponse = {
content: (TextContent | ImageContent)[];
isError?: boolean;
};
export type ToolSchema<Input extends z.Schema> = {
name: string;
title: string;
description: string;
inputSchema: Input;
type: 'readOnly' | 'destructive';
};
export type ToolHandler = (toolName: string, params: any) => Promise<ToolResponse>;
export interface ServerBackend {
name: string;
version: string;
initialize?(): Promise<void>;
tools(): ToolSchema<any>[];
callTool(schema: ToolSchema<any>, parsedArguments: any): Promise<ToolResponse>;
serverInitialized?(version: ClientVersion | undefined): void;
serverClosed?(): void;
}
export type ServerBackendFactory = () => ServerBackend;
export async function connect(serverBackendFactory: ServerBackendFactory, transport: Transport) {
const backend = serverBackendFactory();
await backend.initialize?.();
const server = createServer(backend);
await server.connect(transport);
}
export function createServer(backend: ServerBackend): Server {
const server = new Server({ name: backend.name, version: backend.version }, {
capabilities: {
tools: {},
}
});
const tools = backend.tools();
server.setRequestHandler(ListToolsRequestSchema, async () => {
return { tools: tools.map(tool => ({
name: tool.name,
description: tool.description,
inputSchema: zodToJsonSchema(tool.inputSchema),
annotations: {
title: tool.title,
readOnlyHint: tool.type === 'readOnly',
destructiveHint: tool.type === 'destructive',
openWorldHint: true,
},
})) };
});
server.setRequestHandler(CallToolRequestSchema, async request => {
const errorResult = (...messages: string[]) => ({
content: [{ type: 'text', text: messages.join('\n') }],
isError: true,
});
const tool = tools.find(tool => tool.name === request.params.name) as ToolSchema<any>;
if (!tool)
return errorResult(`Tool "${request.params.name}" not found`);
try {
return await backend.callTool(tool, tool.inputSchema.parse(request.params.arguments || {}));
} catch (error) {
return errorResult(String(error));
}
});
if (backend.serverInitialized)
server.oninitialized = () => backend.serverInitialized!(server.getClientVersion());
if (backend.serverClosed)
server.onclose = () => backend.serverClosed!();
return server;
}

View File

@ -18,14 +18,11 @@ import { program, Option } from 'commander';
// @ts-ignore
import { startTraceViewerServer } from 'playwright-core/lib/server';
import * as mcpTransport from './mcp/transport.js';
import { startHttpServer, startHttpTransport, startStdioTransport } from './transport.js';
import { commaSeparatedList, resolveCLIConfig, semicolonSeparatedList } from './config.js';
import { Server } from './server.js';
import { packageJSON } from './package.js';
import { runWithExtension } from './extension/main.js';
import { BrowserServerBackend } from './browserServerBackend.js';
import { Context } from './context.js';
import { contextFactory } from './browserContextFactory.js';
import { runLoopTools } from './loopTools/main.js';
program
.version('Version ' + packageJSON.version)
@ -56,9 +53,12 @@ program
.option('--user-data-dir <path>', 'path to the user data directory. If not specified, a temporary directory will be created.')
.option('--viewport-size <size>', 'specify browser viewport size in pixels, for example "1280, 720"')
.addOption(new Option('--extension', 'Connect to a running browser instance (Edge/Chrome only). Requires the "Playwright MCP Bridge" browser extension to be installed.').hideHelp())
.addOption(new Option('--loop-tools', 'Run loop tools').hideHelp())
.addOption(new Option('--vision', 'Legacy option, use --caps=vision instead').hideHelp())
.action(async options => {
if (options.extension) {
await runWithExtension(options);
return;
}
if (options.vision) {
// eslint-disable-next-line no-console
@ -66,20 +66,16 @@ program
options.caps = 'vision';
}
const config = await resolveCLIConfig(options);
const abortController = setupExitWatchdog(config.server);
if (options.extension) {
await runWithExtension(config, abortController);
return;
}
if (options.loopTools) {
await runLoopTools(config);
return;
}
const server = new Server(config);
server.setupExitWatchdog();
const browserContextFactory = contextFactory(config.browser);
const serverBackendFactory = () => new BrowserServerBackend(config, browserContextFactory);
await mcpTransport.start(serverBackendFactory, config.server);
if (config.server.port !== undefined) {
const httpServer = await startHttpServer(config.server);
startHttpTransport(httpServer, server);
} else {
await startStdioTransport(server);
}
if (config.saveTrace) {
const server = await startTraceViewerServer();
@ -90,27 +86,4 @@ program
}
});
function setupExitWatchdog(serverConfig: { host?: string; port?: number }) {
const abortController = new AbortController();
let isExiting = false;
const handleExit = async () => {
if (isExiting)
return;
isExiting = true;
setTimeout(() => process.exit(0), 15000);
abortController.abort('Process exiting');
await Context.disposeAll();
process.exit(0);
};
if (serverConfig.port !== undefined) {
process.stdin.on('close', handleExit);
}
process.on('SIGINT', handleExit);
process.on('SIGTERM', handleExit);
return abortController;
}
void program.parseAsync(process.argv);

59
src/server.ts Normal file
View File

@ -0,0 +1,59 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { createConnection } from './connection.js';
import { contextFactory as defaultContextFactory } from './browserContextFactory.js';
import type { FullConfig } from './config.js';
import type { Connection } from './connection.js';
import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
export class Server {
readonly config: FullConfig;
private _connectionList: Connection[] = [];
private _browserConfig: FullConfig['browser'];
private _contextFactory: BrowserContextFactory;
constructor(config: FullConfig, contextFactory?: BrowserContextFactory) {
this.config = config;
this._browserConfig = config.browser;
this._contextFactory = contextFactory ?? defaultContextFactory(this._browserConfig);
}
async createConnection(transport: Transport): Promise<Connection> {
const connection = await createConnection(this.config, this._contextFactory);
this._connectionList.push(connection);
await connection.server.connect(transport);
return connection;
}
setupExitWatchdog() {
let isExiting = false;
const handleExit = async () => {
if (isExiting)
return;
isExiting = true;
setTimeout(() => process.exit(0), 15000);
await Promise.all(this._connectionList.map(connection => connection.close()));
process.exit(0);
};
process.stdin.on('close', handleExit);
process.on('SIGINT', handleExit);
process.on('SIGTERM', handleExit);
}
}

View File

@ -15,7 +15,6 @@
*/
import common from './tools/common.js';
import configure from './tools/configure.js';
import console from './tools/console.js';
import dialogs from './tools/dialogs.js';
import evaluate from './tools/evaluate.js';
@ -33,11 +32,9 @@ import wait from './tools/wait.js';
import mouse from './tools/mouse.js';
import type { Tool } from './tools/tool.js';
import type { FullConfig } from './config.js';
export const allTools: Tool<any>[] = [
...common,
...configure,
...console,
...dialogs,
...evaluate,
@ -54,7 +51,3 @@ export const allTools: Tool<any>[] = [
...video,
...wait,
];
export function filteredTools(config: FullConfig) {
return allTools.filter(tool => tool.capability.startsWith('core') || config.capabilities?.includes(tool.capability));
}

View File

@ -29,7 +29,7 @@ const close = defineTool({
},
handle: async (context, params, response) => {
await context.closeBrowserContext();
await context.close();
response.setIncludeTabs();
response.addCode(`await page.close()`);
},

View File

@ -1,87 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import type { Context } from '../context.js';
import type { Response } from '../response.js';
const configureSchema = z.object({
headless: z.boolean().optional().describe('Whether to run the browser in headless mode'),
viewport: z.object({
width: z.number(),
height: z.number(),
}).optional().describe('Browser viewport size'),
userAgent: z.string().optional().describe('User agent string for the browser'),
});
export default [
defineTool({
capability: 'core',
schema: {
name: 'browser_configure',
title: 'Configure browser settings',
description: 'Change browser configuration settings like headless/headed mode, viewport size, or user agent for subsequent operations. This will close the current browser and restart it with new settings.',
inputSchema: configureSchema,
type: 'destructive',
},
handle: async (context: Context, params: z.output<typeof configureSchema>, response: Response) => {
try {
const currentConfig = context.config;
const changes: string[] = [];
// Track what's changing
if (params.headless !== undefined) {
const currentHeadless = currentConfig.browser.launchOptions.headless;
if (params.headless !== currentHeadless) {
changes.push(`headless: ${currentHeadless}${params.headless}`);
}
}
if (params.viewport) {
const currentViewport = currentConfig.browser.contextOptions.viewport;
if (!currentViewport || currentViewport.width !== params.viewport.width || currentViewport.height !== params.viewport.height) {
changes.push(`viewport: ${currentViewport?.width || 'default'}x${currentViewport?.height || 'default'}${params.viewport.width}x${params.viewport.height}`);
}
}
if (params.userAgent) {
const currentUA = currentConfig.browser.contextOptions.userAgent;
if (params.userAgent !== currentUA) {
changes.push(`userAgent: ${currentUA || 'default'}${params.userAgent}`);
}
}
if (changes.length === 0) {
response.addResult('No configuration changes detected. Current settings remain the same.');
return;
}
// Apply the configuration changes
await context.updateBrowserConfig({
headless: params.headless,
viewport: params.viewport,
userAgent: params.userAgent,
});
response.addResult(`Browser configuration updated successfully:\n${changes.map(c => `${c}`).join('\n')}\n\nThe browser has been restarted with the new settings.`);
} catch (error) {
throw new Error(`Failed to update browser configuration: ${error}`);
}
},
}),
];

View File

@ -20,7 +20,16 @@ import type * as playwright from 'playwright';
import type { ToolCapability } from '../../config.js';
import type { Tab } from '../tab.js';
import type { Response } from '../response.js';
import type { ToolSchema } from '../mcp/server.js';
export type ToolSchema<Input extends InputType> = {
name: string;
title: string;
description: string;
inputSchema: Input;
type: 'readOnly' | 'destructive';
};
type InputType = z.Schema;
export type FileUploadModalState = {
type: 'fileChooser';
@ -36,24 +45,44 @@ export type DialogModalState = {
export type ModalState = FileUploadModalState | DialogModalState;
export type Tool<Input extends z.Schema = z.Schema> = {
export type SnapshotContent = {
type: 'snapshot';
snapshot: string;
};
export type TextContent = {
type: 'text';
text: string;
};
export type ImageContent = {
type: 'image';
image: string;
};
export type CodeContent = {
type: 'code';
code: string[];
};
export type Tool<Input extends InputType = InputType> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
handle: (context: Context, params: z.output<Input>, response: Response) => Promise<void>;
};
export function defineTool<Input extends z.Schema>(tool: Tool<Input>): Tool<Input> {
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
return tool;
}
export type TabTool<Input extends z.Schema = z.Schema> = {
export type TabTool<Input extends InputType = InputType> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
clearsModalState?: ModalState['type'];
handle: (tab: Tab, params: z.output<Input>, response: Response) => Promise<void>;
};
export function defineTabTool<Input extends z.Schema>(tool: TabTool<Input>): Tool<Input> {
export function defineTabTool<Input extends InputType>(tool: TabTool<Input>): Tool<Input> {
return {
...tool,
handle: async (context, params, response) => {

View File

@ -14,34 +14,28 @@
* limitations under the License.
*/
import http from 'http';
import crypto from 'crypto';
import debug from 'debug';
import http from 'node:http';
import assert from 'node:assert';
import crypto from 'node:crypto';
import debug from 'debug';
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { httpAddressToString, startHttpServer } from '../httpServer.js';
import * as mcpServer from './server.js';
import type { ServerBackendFactory } from './server.js';
import { logUnhandledError } from './log.js';
export async function start(serverBackendFactory: ServerBackendFactory, options: { host?: string; port?: number }) {
if (options.port !== undefined) {
const httpServer = await startHttpServer(options);
startHttpTransport(httpServer, serverBackendFactory);
} else {
await startStdioTransport(serverBackendFactory);
}
}
import type { AddressInfo } from 'node:net';
import type { Server } from './server.js';
import type { Connection } from './connection.js';
async function startStdioTransport(serverBackendFactory: ServerBackendFactory) {
await mcpServer.connect(serverBackendFactory, new StdioServerTransport());
export async function startStdioTransport(server: Server) {
await server.createConnection(new StdioServerTransport());
}
const testDebug = debug('pw:mcp:test');
async function handleSSE(serverBackendFactory: ServerBackendFactory, req: http.IncomingMessage, res: http.ServerResponse, url: URL, sessions: Map<string, SSEServerTransport>) {
async function handleSSE(server: Server, req: http.IncomingMessage, res: http.ServerResponse, url: URL, sessions: Map<string, SSEServerTransport>) {
if (req.method === 'POST') {
const sessionId = url.searchParams.get('sessionId');
if (!sessionId) {
@ -60,10 +54,11 @@ async function handleSSE(serverBackendFactory: ServerBackendFactory, req: http.I
const transport = new SSEServerTransport('/sse', res);
sessions.set(transport.sessionId, transport);
testDebug(`create SSE session: ${transport.sessionId}`);
await mcpServer.connect(serverBackendFactory, transport);
const connection = await server.createConnection(transport);
res.on('close', () => {
testDebug(`delete SSE session: ${transport.sessionId}`);
sessions.delete(transport.sessionId);
void connection.close().catch(logUnhandledError);
});
return;
}
@ -72,10 +67,10 @@ async function handleSSE(serverBackendFactory: ServerBackendFactory, req: http.I
res.end('Method not allowed');
}
async function handleStreamable(serverBackendFactory: ServerBackendFactory, req: http.IncomingMessage, res: http.ServerResponse, sessions: Map<string, StreamableHTTPServerTransport>) {
async function handleStreamable(server: Server, req: http.IncomingMessage, res: http.ServerResponse, sessions: Map<string, { transport: StreamableHTTPServerTransport, connection: Connection }>) {
const sessionId = req.headers['mcp-session-id'] as string | undefined;
if (sessionId) {
const transport = sessions.get(sessionId);
const { transport } = sessions.get(sessionId) ?? {};
if (!transport) {
res.statusCode = 404;
res.end('Session not found');
@ -89,16 +84,18 @@ async function handleStreamable(serverBackendFactory: ServerBackendFactory, req:
sessionIdGenerator: () => crypto.randomUUID(),
onsessioninitialized: async sessionId => {
testDebug(`create http session: ${transport.sessionId}`);
await mcpServer.connect(serverBackendFactory, transport);
sessions.set(sessionId, transport);
const connection = await server.createConnection(transport);
sessions.set(sessionId, { transport, connection });
}
});
transport.onclose = () => {
if (!transport.sessionId)
const result = transport.sessionId ? sessions.get(transport.sessionId) : undefined;
if (!result)
return;
sessions.delete(transport.sessionId);
sessions.delete(result.transport.sessionId!);
testDebug(`delete http session: ${transport.sessionId}`);
result.connection.close().catch(logUnhandledError);
};
await transport.handleRequest(req, res);
@ -109,15 +106,28 @@ async function handleStreamable(serverBackendFactory: ServerBackendFactory, req:
res.end('Invalid request');
}
function startHttpTransport(httpServer: http.Server, serverBackendFactory: ServerBackendFactory) {
const sseSessions = new Map();
export async function startHttpServer(config: { host?: string, port?: number }): Promise<http.Server> {
const { host, port } = config;
const httpServer = http.createServer();
await new Promise<void>((resolve, reject) => {
httpServer.on('error', reject);
httpServer.listen(port, host, () => {
resolve();
httpServer.removeListener('error', reject);
});
});
return httpServer;
}
export function startHttpTransport(httpServer: http.Server, mcpServer: Server) {
const sseSessions = new Map<string, SSEServerTransport>();
const streamableSessions = new Map();
httpServer.on('request', async (req, res) => {
const url = new URL(`http://localhost${req.url}`);
if (url.pathname.startsWith('/sse'))
await handleSSE(serverBackendFactory, req, res, url, sseSessions);
await handleSSE(mcpServer, req, res, url, sseSessions);
else
await handleStreamable(serverBackendFactory, req, res, streamableSessions);
await handleStreamable(mcpServer, req, res, streamableSessions);
});
const url = httpAddressToString(httpServer.address());
const message = [
@ -135,3 +145,14 @@ function startHttpTransport(httpServer: http.Server, serverBackendFactory: Serve
// eslint-disable-next-line no-console
console.error(message);
}
export function httpAddressToString(address: string | AddressInfo | null): string {
assert(address, 'Could not bind server socket');
if (typeof address === 'string')
return address;
const resolvedPort = address.port;
let resolvedHost = address.family === 'IPv4' ? address.address : `[${address.address}]`;
if (resolvedHost === '0.0.0.0' || resolvedHost === '[::]')
resolvedHost = 'localhost';
return `http://${resolvedHost}:${resolvedPort}`;
}