chore: document mcp tools (#35258)
This commit is contained in:
parent
0a3387fda3
commit
23b5b05f67
|
@ -1,4 +1,4 @@
|
||||||
### Playwright MCP
|
## Playwright MCP
|
||||||
|
|
||||||
This package is experimental and not yet ready for production use.
|
This package is experimental and not yet ready for production use.
|
||||||
It is a subject to change and will not respect semver versioning.
|
It is a subject to change and will not respect semver versioning.
|
||||||
|
@ -62,3 +62,162 @@ And then in MCP config, add following to the `env`:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Tool Modes
|
||||||
|
|
||||||
|
The tools are available in two modes:
|
||||||
|
|
||||||
|
1. **Snapshot Mode** (default): Uses accessibility snapshots for better performance and reliability
|
||||||
|
2. **Vision Mode**: Uses screenshots for visual-based interactions
|
||||||
|
|
||||||
|
To use Vision Mode, add the `--vision` flag when starting the server:
|
||||||
|
|
||||||
|
```js
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"playwright": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": [
|
||||||
|
"@playwright/mcp",
|
||||||
|
"--vision"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Vision Mode works best with the computer use models that are able to interact with elements using
|
||||||
|
X Y coordinate space, based on the provided screenshot.
|
||||||
|
|
||||||
|
### Snapshot Mode
|
||||||
|
|
||||||
|
The Playwright MCP provides a set of tools for browser automation. Here are all available tools:
|
||||||
|
|
||||||
|
- **browser_navigate**
|
||||||
|
- Description: Navigate to a URL
|
||||||
|
- Parameters:
|
||||||
|
- `url` (string): The URL to navigate to
|
||||||
|
|
||||||
|
- **browser_go_back**
|
||||||
|
- Description: Go back to the previous page
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_go_forward**
|
||||||
|
- Description: Go forward to the next page
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_click**
|
||||||
|
- Description: Perform click on a web page
|
||||||
|
- Parameters:
|
||||||
|
- `element` (string): Human-readable element description used to obtain the permission to interact with the element
|
||||||
|
- `ref` (string): Exact target element reference from the page snapshot
|
||||||
|
|
||||||
|
- **browser_hover**
|
||||||
|
- Description: Hover over element on page
|
||||||
|
- Parameters:
|
||||||
|
- `element` (string): Human-readable element description used to obtain the permission to interact with the element
|
||||||
|
- `ref` (string): Exact target element reference from the page snapshot
|
||||||
|
|
||||||
|
- **browser_drag**
|
||||||
|
- Description: Perform drag and drop between two elements
|
||||||
|
- Parameters:
|
||||||
|
- `startElement` (string): Human-readable source element description used to obtain the permission to interact with the element
|
||||||
|
- `startRef` (string): Exact source element reference from the page snapshot
|
||||||
|
- `endElement` (string): Human-readable target element description used to obtain the permission to interact with the element
|
||||||
|
- `endRef` (string): Exact target element reference from the page snapshot
|
||||||
|
|
||||||
|
- **browser_type**
|
||||||
|
- Description: Type text into editable element
|
||||||
|
- Parameters:
|
||||||
|
- `element` (string): Human-readable element description used to obtain the permission to interact with the element
|
||||||
|
- `ref` (string): Exact target element reference from the page snapshot
|
||||||
|
- `text` (string): Text to type into the element
|
||||||
|
- `submit` (boolean): Whether to submit entered text (press Enter after)
|
||||||
|
|
||||||
|
- **browser_press_key**
|
||||||
|
- Description: Press a key on the keyboard
|
||||||
|
- Parameters:
|
||||||
|
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||||
|
|
||||||
|
- **browser_snapshot**
|
||||||
|
- Description: Capture accessibility snapshot of the current page (better than screenshot)
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_save_as_pdf**
|
||||||
|
- Description: Save page as PDF
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_wait**
|
||||||
|
- Description: Wait for a specified time in seconds
|
||||||
|
- Parameters:
|
||||||
|
- `time` (number): The time to wait in seconds (capped at 10 seconds)
|
||||||
|
|
||||||
|
- **browser_close**
|
||||||
|
- Description: Close the page
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
|
||||||
|
### Vision Mode
|
||||||
|
|
||||||
|
Vision Mode provides tools for visual-based interactions using screenshots. Here are all available tools:
|
||||||
|
|
||||||
|
- **browser_navigate**
|
||||||
|
- Description: Navigate to a URL
|
||||||
|
- Parameters:
|
||||||
|
- `url` (string): The URL to navigate to
|
||||||
|
|
||||||
|
- **browser_go_back**
|
||||||
|
- Description: Go back to the previous page
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_go_forward**
|
||||||
|
- Description: Go forward to the next page
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_screenshot**
|
||||||
|
- Description: Capture screenshot of the current page
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_move_mouse**
|
||||||
|
- Description: Move mouse to specified coordinates
|
||||||
|
- Parameters:
|
||||||
|
- `x` (number): X coordinate
|
||||||
|
- `y` (number): Y coordinate
|
||||||
|
|
||||||
|
- **browser_click**
|
||||||
|
- Description: Click at specified coordinates
|
||||||
|
- Parameters:
|
||||||
|
- `x` (number): X coordinate to click at
|
||||||
|
- `y` (number): Y coordinate to click at
|
||||||
|
|
||||||
|
- **browser_drag**
|
||||||
|
- Description: Perform drag and drop operation
|
||||||
|
- Parameters:
|
||||||
|
- `startX` (number): Start X coordinate
|
||||||
|
- `startY` (number): Start Y coordinate
|
||||||
|
- `endX` (number): End X coordinate
|
||||||
|
- `endY` (number): End Y coordinate
|
||||||
|
|
||||||
|
- **browser_type**
|
||||||
|
- Description: Type text at specified coordinates
|
||||||
|
- Parameters:
|
||||||
|
- `text` (string): Text to type
|
||||||
|
- `submit` (boolean): Whether to submit entered text (press Enter after)
|
||||||
|
|
||||||
|
- **browser_press_key**
|
||||||
|
- Description: Press a key on the keyboard
|
||||||
|
- Parameters:
|
||||||
|
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||||
|
|
||||||
|
- **browser_save_as_pdf**
|
||||||
|
- Description: Save page as PDF
|
||||||
|
- Parameters: None
|
||||||
|
|
||||||
|
- **browser_wait**
|
||||||
|
- Description: Wait for a specified time in seconds
|
||||||
|
- Parameters:
|
||||||
|
- `time` (number): The time to wait in seconds (capped at 10 seconds)
|
||||||
|
|
||||||
|
- **browser_close**
|
||||||
|
- Description: Close the page
|
||||||
|
- Parameters: None
|
||||||
|
|
|
@ -38,7 +38,7 @@ export const screenshot: Tool = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const elementSchema = z.object({
|
const elementSchema = z.object({
|
||||||
element: z.string().describe('Element label, description or any other text to describe the element'),
|
element: z.string().describe('Human-readable element description used to obtain the permission to interact with the element'),
|
||||||
});
|
});
|
||||||
|
|
||||||
const moveMouseSchema = elementSchema.extend({
|
const moveMouseSchema = elementSchema.extend({
|
||||||
|
@ -63,15 +63,22 @@ export const moveMouse: Tool = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const clickSchema = elementSchema.extend({
|
||||||
|
x: z.number().describe('X coordinate'),
|
||||||
|
y: z.number().describe('Y coordinate'),
|
||||||
|
});
|
||||||
|
|
||||||
export const click: Tool = {
|
export const click: Tool = {
|
||||||
schema: {
|
schema: {
|
||||||
name: 'browser_click',
|
name: 'browser_click',
|
||||||
description: 'Click left mouse button',
|
description: 'Click left mouse button',
|
||||||
inputSchema: zodToJsonSchema(elementSchema),
|
inputSchema: zodToJsonSchema(clickSchema),
|
||||||
},
|
},
|
||||||
|
|
||||||
handle: async context => {
|
handle: async (context, params) => {
|
||||||
await runAndWait(context, async page => {
|
await runAndWait(context, async page => {
|
||||||
|
const validatedParams = clickSchema.parse(params);
|
||||||
|
await page.mouse.move(validatedParams.x, validatedParams.y);
|
||||||
await page.mouse.down();
|
await page.mouse.down();
|
||||||
await page.mouse.up();
|
await page.mouse.up();
|
||||||
});
|
});
|
||||||
|
@ -82,8 +89,10 @@ export const click: Tool = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const dragSchema = elementSchema.extend({
|
const dragSchema = elementSchema.extend({
|
||||||
x: z.number().describe('X coordinate'),
|
startX: z.number().describe('Start X coordinate'),
|
||||||
y: z.number().describe('Y coordinate'),
|
startY: z.number().describe('Start Y coordinate'),
|
||||||
|
endX: z.number().describe('End X coordinate'),
|
||||||
|
endY: z.number().describe('End Y coordinate'),
|
||||||
});
|
});
|
||||||
|
|
||||||
export const drag: Tool = {
|
export const drag: Tool = {
|
||||||
|
@ -96,18 +105,20 @@ export const drag: Tool = {
|
||||||
handle: async (context, params) => {
|
handle: async (context, params) => {
|
||||||
const validatedParams = dragSchema.parse(params);
|
const validatedParams = dragSchema.parse(params);
|
||||||
await runAndWait(context, async page => {
|
await runAndWait(context, async page => {
|
||||||
|
await page.mouse.move(validatedParams.startX, validatedParams.startY);
|
||||||
await page.mouse.down();
|
await page.mouse.down();
|
||||||
await page.mouse.move(validatedParams.x, validatedParams.y);
|
await page.mouse.move(validatedParams.endX, validatedParams.endY);
|
||||||
await page.mouse.up();
|
await page.mouse.up();
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
content: [{ type: 'text', text: `Dragged mouse to (${validatedParams.x}, ${validatedParams.y})` }],
|
content: [{ type: 'text', text: `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})` }],
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const typeSchema = z.object({
|
const typeSchema = z.object({
|
||||||
text: z.string().describe('Text to type'),
|
text: z.string().describe('Text to type into the element'),
|
||||||
|
submit: z.boolean().describe('Whether to submit entered text (press Enter after)'),
|
||||||
});
|
});
|
||||||
|
|
||||||
export const type: Tool = {
|
export const type: Tool = {
|
||||||
|
@ -121,7 +132,9 @@ export const type: Tool = {
|
||||||
const validatedParams = typeSchema.parse(params);
|
const validatedParams = typeSchema.parse(params);
|
||||||
await runAndWait(context, async page => {
|
await runAndWait(context, async page => {
|
||||||
await page.keyboard.type(validatedParams.text);
|
await page.keyboard.type(validatedParams.text);
|
||||||
});
|
if (validatedParams.submit)
|
||||||
|
await page.keyboard.press('Enter');
|
||||||
|
}, true);
|
||||||
return {
|
return {
|
||||||
content: [{ type: 'text', text: `Typed text "${validatedParams.text}"` }],
|
content: [{ type: 'text', text: `Typed text "${validatedParams.text}"` }],
|
||||||
};
|
};
|
||||||
|
|
|
@ -35,8 +35,8 @@ export const snapshot: Tool = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const elementSchema = z.object({
|
const elementSchema = z.object({
|
||||||
element: z.string().describe('Element label, description of any other text to describe the element'),
|
element: z.string().describe('Human-readable element description used to obtain the permission to interact with the element'),
|
||||||
ref: z.string().describe('Target element reference'),
|
ref: z.string().describe('Exact target element reference from the page snapshot'),
|
||||||
});
|
});
|
||||||
|
|
||||||
export const click: Tool = {
|
export const click: Tool = {
|
||||||
|
@ -48,7 +48,31 @@ export const click: Tool = {
|
||||||
|
|
||||||
handle: async (context, params) => {
|
handle: async (context, params) => {
|
||||||
const validatedParams = elementSchema.parse(params);
|
const validatedParams = elementSchema.parse(params);
|
||||||
return runAndWait(context, page => refLocator(page, validatedParams).click(), true);
|
return runAndWait(context, page => refLocator(page, validatedParams.ref).click(), true);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const dragSchema = z.object({
|
||||||
|
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
|
||||||
|
startRef: z.string().describe('Exact source element reference from the page snapshot'),
|
||||||
|
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
|
||||||
|
endRef: z.string().describe('Exact target element reference from the page snapshot'),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const drag: Tool = {
|
||||||
|
schema: {
|
||||||
|
name: 'browser_drag',
|
||||||
|
description: 'Perform drag and drop between two elements',
|
||||||
|
inputSchema: zodToJsonSchema(dragSchema),
|
||||||
|
},
|
||||||
|
|
||||||
|
handle: async (context, params) => {
|
||||||
|
const validatedParams = dragSchema.parse(params);
|
||||||
|
return runAndWait(context, async page => {
|
||||||
|
const startLocator = refLocator(page, validatedParams.startRef);
|
||||||
|
const endLocator = refLocator(page, validatedParams.endRef);
|
||||||
|
await startLocator.dragTo(endLocator);
|
||||||
|
}, true);
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -61,7 +85,7 @@ export const hover: Tool = {
|
||||||
|
|
||||||
handle: async (context, params) => {
|
handle: async (context, params) => {
|
||||||
const validatedParams = elementSchema.parse(params);
|
const validatedParams = elementSchema.parse(params);
|
||||||
return runAndWait(context, page => refLocator(page, validatedParams).hover(), true);
|
return runAndWait(context, page => refLocator(page, validatedParams.ref).hover(), true);
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -80,7 +104,7 @@ export const type: Tool = {
|
||||||
handle: async (context, params) => {
|
handle: async (context, params) => {
|
||||||
const validatedParams = typeSchema.parse(params);
|
const validatedParams = typeSchema.parse(params);
|
||||||
return await runAndWait(context, async page => {
|
return await runAndWait(context, async page => {
|
||||||
const locator = refLocator(page, validatedParams);
|
const locator = refLocator(page, validatedParams.ref);
|
||||||
await locator.fill(validatedParams.text);
|
await locator.fill(validatedParams.text);
|
||||||
if (validatedParams.submit)
|
if (validatedParams.submit)
|
||||||
await locator.press('Enter');
|
await locator.press('Enter');
|
||||||
|
@ -88,6 +112,6 @@ export const type: Tool = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
function refLocator(page: playwright.Page, params: z.infer<typeof elementSchema>): playwright.Locator {
|
function refLocator(page: playwright.Page, ref: string): playwright.Locator {
|
||||||
return page.locator(`aria-ref=${params.ref}`);
|
return page.locator(`aria-ref=${ref}`);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue