fix: do not change glob pattern when converting to url (#34923)

This commit is contained in:
Yury Semikhatsky 2025-02-28 13:44:25 -08:00 committed by GitHub
parent 218e4e90aa
commit cd437c972d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 89 additions and 40 deletions

View File

@ -1203,9 +1203,7 @@ Enabling routing disables http cache.
* since: v1.8
- `url` <[string]|[RegExp]|[function]\([URL]\):[boolean]>
A glob pattern, regex pattern or predicate receiving [URL] to match while routing.
When a [`option: Browser.newContext.baseURL`] via the context options was provided and the passed URL is a path,
it gets merged via the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If [`option: Browser.newContext.baseURL`] is set in the context options and the provided URL is a string that does not start with `*`, it is resolved using the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
### param: BrowserContext.route.handler
* since: v1.8

View File

@ -3607,9 +3607,7 @@ Enabling routing disables http cache.
* since: v1.8
- `url` <[string]|[RegExp]|[function]\([URL]\):[boolean]>
A glob pattern, regex pattern or predicate receiving [URL] to match while routing.
When a [`option: Browser.newContext.baseURL`] via the context options was provided and the passed URL is a path,
it gets merged via the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If [`option: Browser.newContext.baseURL`] is set in the context options and the provided URL is a string that does not start with `*`, it is resolved using the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
### param: Page.route.handler
* since: v1.8

View File

@ -706,15 +706,13 @@ Playwright uses simplified glob patterns for URL matching in network interceptio
1. Asterisks:
- A single `*` matches any characters except `/`
- A double `**` matches any characters including `/`
1. Question mark `?` matches any single character except `/`
1. Question mark `?` matches only question mark `?`. If you want to match any character, use `*` instead.
1. Curly braces `{}` can be used to match a list of options separated by commas `,`
1. Square brackets `[]` can be used to match a set of characters
1. Backslash `\` can be used to escape any of special characters (note to escape backslash itself as `\\`)
Examples:
- `https://example.com/*.js` matches `https://example.com/file.js` but not `https://example.com/path/file.js`
- `https://example.com/\\?page=1` matches `https://example.com/?page=1` but not `https://example.com`
- `**/v[0-9]*` matches `https://example.com/v1/` but not `https://example.com/vote/`
- `https://example.com/?page=1` matches `https://example.com/?page=1` but not `https://example.com`
- `**/*.js` matches both `https://example.com/file.js` and `https://example.com/path/file.js`
- `**/*.{png,jpg,jpeg}` matches all image requests

View File

@ -3974,9 +3974,9 @@ export interface Page {
*
* **NOTE** Enabling routing disables http cache.
*
* @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context
* options was provided and the passed URL is a path, it gets merged via the
* @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the
* context options and the provided URL is a string that does not start with `*`, it is resolved using the
* [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
* @param handler handler function to route the request.
* @param options
@ -9068,9 +9068,9 @@ export interface BrowserContext {
*
* **NOTE** Enabling routing disables http cache.
*
* @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context
* options was provided and the passed URL is a path, it gets merged via the
* @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the
* context options and the provided URL is a string that does not start with `*`, it is resolved using the
* [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
* @param handler handler function to route the request.
* @param options

View File

@ -50,15 +50,6 @@ export function globToRegex(glob: string): RegExp {
}
switch (c) {
case '?':
tokens.push('.');
break;
case '[':
tokens.push('[');
break;
case ']':
tokens.push(']');
break;
case '{':
inGroup = true;
tokens.push('(');
@ -101,7 +92,36 @@ export function urlMatches(baseURL: string | undefined, urlString: string, match
// Allow http(s) baseURL to match ws(s) urls.
if (baseURL && /^https?:\/\//.test(baseURL) && /^wss?:\/\//.test(urlString))
baseURL = baseURL.replace(/^http/, 'ws');
match = constructURLBasedOnBaseURL(baseURL, match);
const tokenMap = new Map<string, string>();
function mapToken(original: string, replacement: string) {
if (original.length === 0)
return '';
tokenMap.set(replacement, original);
return replacement;
}
// Escaped `\\?` behaves the same as `?` in our glob patterns.
match = match.replaceAll(/\\\\\?/g, '?');
// Glob symbols may be escaped in the URL and some of them such as ? affect resolution,
// so we replace them with safe components first.
const relativePath = match.split('/').map((token, index) => {
if (token === '.' || token === '..' || token === '')
return token;
// Handle special case of http*://, note that the new schema has to be
// a web schema so that slashes are properly inserted after domain.
if (index === 0 && token.endsWith(':'))
return mapToken(token, 'http:');
const questionIndex = token.indexOf('?');
if (questionIndex === -1)
return mapToken(token, `$_${index}_$`);
const newPrefix = mapToken(token.substring(0, questionIndex), `$_${index}_$`);
const newSuffix = mapToken(token.substring(questionIndex), `?$_${index}_$`);
return newPrefix + newSuffix;
}).join('/');
let resolved = constructURLBasedOnBaseURL(baseURL, relativePath);
for (const [token, original] of tokenMap)
resolved = resolved.replace(token, original);
match = resolved;
}
if (isString(match))
match = globToRegex(match);

View File

@ -3974,9 +3974,9 @@ export interface Page {
*
* **NOTE** Enabling routing disables http cache.
*
* @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context
* options was provided and the passed URL is a path, it gets merged via the
* @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the
* context options and the provided URL is a string that does not start with `*`, it is resolved using the
* [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
* @param handler handler function to route the request.
* @param options
@ -9068,9 +9068,9 @@ export interface BrowserContext {
*
* **NOTE** Enabling routing disables http cache.
*
* @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context
* options was provided and the passed URL is a path, it gets merged via the
* @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If
* [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the
* context options and the provided URL is a string that does not start with `*`, it is resolved using the
* [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
* @param handler handler function to route the request.
* @param options

View File

@ -16,7 +16,7 @@
*/
import { test as it, expect } from './pageTest';
import { globToRegex } from '../../packages/playwright-core/lib/utils/isomorphic/urlMatch';
import { globToRegex, urlMatches } from '../../packages/playwright-core/lib/utils/isomorphic/urlMatch';
import vm from 'vm';
it('should work with navigation @smoke', async ({ page, server }) => {
@ -76,7 +76,6 @@ it('should work with glob', async () => {
expect(globToRegex('*.js').test('https://localhost:8080/foo.js')).toBeFalsy();
expect(globToRegex('https://**/*.js').test('https://localhost:8080/foo.js')).toBeTruthy();
expect(globToRegex('http://localhost:8080/simple/path.js').test('http://localhost:8080/simple/path.js')).toBeTruthy();
expect(globToRegex('http://localhost:8080/?imple/path.js').test('http://localhost:8080/Simple/path.js')).toBeTruthy();
expect(globToRegex('**/{a,b}.js').test('https://localhost:8080/a.js')).toBeTruthy();
expect(globToRegex('**/{a,b}.js').test('https://localhost:8080/b.js')).toBeTruthy();
expect(globToRegex('**/{a,b}.js').test('https://localhost:8080/c.js')).toBeFalsy();
@ -90,21 +89,56 @@ it('should work with glob', async () => {
expect(globToRegex('http://localhost:3000/signin-oidc*').test('http://localhost:3000/signin-oidc/foo')).toBeFalsy();
expect(globToRegex('http://localhost:3000/signin-oidc*').test('http://localhost:3000/signin-oidcnice')).toBeTruthy();
// range []
expect(globToRegex('**/api/v[0-9]').test('http://example.com/api/v1')).toBeTruthy();
// range [] is NOT supported
expect(globToRegex('**/api/v[0-9]').test('http://example.com/api/v[0-9]')).toBeTruthy();
expect(globToRegex('**/api/v[0-9]').test('http://example.com/api/version')).toBeFalsy();
// query params
expect(globToRegex('**/api\\?param').test('http://example.com/api?param')).toBeTruthy();
expect(globToRegex('**/api\\?param').test('http://example.com/api-param')).toBeFalsy();
expect(globToRegex('**/three-columns/settings.html\\?**id=[a-z]**').test('http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah')).toBeTruthy();
expect(globToRegex('**/three-columns/settings.html\\?**id=settings-**').test('http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah')).toBeTruthy();
expect(globToRegex('\\?')).toEqual(/^\?$/);
expect(globToRegex('\\')).toEqual(/^\\$/);
expect(globToRegex('\\\\')).toEqual(/^\\$/);
expect(globToRegex('\\[')).toEqual(/^\[$/);
expect(globToRegex('[a-z]')).toEqual(/^[a-z]$/);
expect(globToRegex('[a-z]')).toEqual(/^\[a-z\]$/);
expect(globToRegex('$^+.\\*()|\\?\\{\\}\\[\\]')).toEqual(/^\$\^\+\.\*\(\)\|\?\{\}\[\]$/);
expect(urlMatches(undefined, 'http://playwright.dev/', 'http://playwright.dev')).toBeTruthy();
expect(urlMatches(undefined, 'http://playwright.dev/?a=b', 'http://playwright.dev?a=b')).toBeTruthy();
expect(urlMatches(undefined, 'http://playwright.dev/', 'h*://playwright.dev')).toBeTruthy();
expect(urlMatches(undefined, 'http://api.playwright.dev/?x=y', 'http://*.playwright.dev?x=y')).toBeTruthy();
expect(urlMatches(undefined, 'http://playwright.dev/foo/bar', '**/foo/**')).toBeTruthy();
expect(urlMatches('http://playwright.dev', 'http://playwright.dev/?x=y', '?x=y')).toBeTruthy();
expect(urlMatches('http://playwright.dev/foo/', 'http://playwright.dev/foo/bar?x=y', './bar?x=y')).toBeTruthy();
// This is not supported, we treat ? as a query separator.
expect(globToRegex('http://localhost:8080/?imple/path.js').test('http://localhost:8080/Simple/path.js')).toBeFalsy();
expect(urlMatches(undefined, 'http://playwright.dev/', 'http://playwright.?ev')).toBeFalsy();
expect(urlMatches(undefined, 'http://playwright./?ev', 'http://playwright.?ev')).toBeTruthy();
expect(urlMatches(undefined, 'http://playwright.dev/foo', 'http://playwright.dev/f??')).toBeFalsy();
expect(urlMatches(undefined, 'http://playwright.dev/f??', 'http://playwright.dev/f??')).toBeTruthy();
expect(urlMatches(undefined, 'http://playwright.dev/?x=y', 'http://playwright.dev\\?x=y')).toBeTruthy();
expect(urlMatches(undefined, 'http://playwright.dev/?x=y', 'http://playwright.dev/\\?x=y')).toBeTruthy();
expect(urlMatches('http://playwright.dev/foo', 'http://playwright.dev/foo?bar', '?bar')).toBeTruthy();
expect(urlMatches('http://playwright.dev/foo', 'http://playwright.dev/foo?bar', '\\\\?bar')).toBeTruthy();
expect(urlMatches('http://first.host/', 'http://second.host/foo', '**/foo')).toBeTruthy();
expect(urlMatches('http://playwright.dev/', 'http://localhost/', '*//localhost/')).toBeTruthy();
});
it('should intercept by glob', async function({ page, server, isAndroid }) {
it.skip(isAndroid);
await page.goto(server.EMPTY_PAGE);
await page.route('http://localhos**?*oo', async route => {
await route.fulfill({
status: 200,
body: 'intercepted',
});
});
const result = await page.evaluate(url => fetch(url).then(r => r.text()), server.PREFIX + '/?foo');
expect(result).toBe('intercepted');
});
it('should intercept network activity from worker', async function({ page, server, isAndroid }) {

View File

@ -71,10 +71,11 @@ it('should unroute', async ({ page, server }) => {
expect(intercepted).toEqual([1]);
});
it('should support ? in glob pattern', async ({ page, server }) => {
it('should not support ? in glob pattern', async ({ page, server }) => {
server.setRoute('/index', (req, res) => res.end('index-no-hello'));
server.setRoute('/index123hello', (req, res) => res.end('index123hello'));
server.setRoute('/index?hello', (req, res) => res.end('index?hello'));
server.setRoute('/index1hello', (req, res) => res.end('index1hello'));
await page.route('**/index?hello', async (route, request) => {
await route.fulfill({ body: 'intercepted any character' });
@ -91,7 +92,7 @@ it('should support ? in glob pattern', async ({ page, server }) => {
expect(await page.content()).toContain('index-no-hello');
await page.goto(server.PREFIX + '/index1hello');
expect(await page.content()).toContain('intercepted any character');
expect(await page.content()).toContain('index1hello');
await page.goto(server.PREFIX + '/index123hello');
expect(await page.content()).toContain('index123hello');