From fa98827fd175063d5245d1161e44e5f90e14d67f Mon Sep 17 00:00:00 2001 From: Reinier van der Leer Date: Fri, 3 Jan 2025 10:48:30 +0100 Subject: [PATCH] fix(backend): Fix validation of hostname-less URLs (#9171) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, `http://` would be converted to `http://http` and pass the no-hostname check that way. It eventually fails validation, but only at hostname lookup which times out -> takes very long. ### Changes 🏗️ - Fix URL canonicalization logic - Merge `_canonicalize_url` into `validate_url` ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] CI --- .../backend/backend/util/request.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/autogpt_platform/backend/backend/util/request.py b/autogpt_platform/backend/backend/util/request.py index 10b716f2c..9a3fa0c5e 100644 --- a/autogpt_platform/backend/backend/util/request.py +++ b/autogpt_platform/backend/backend/util/request.py @@ -33,20 +33,6 @@ ALLOWED_SCHEMES = ["http", "https"] HOSTNAME_REGEX = re.compile(r"^[A-Za-z0-9.-]+$") # Basic DNS-safe hostname pattern -def _canonicalize_url(url: str) -> str: - """ - Normalizes the URL by: - 1. Stripping whitespace and trailing slashes. - 2. Ensuring the scheme is http:// or https:// if missing. - 3. Replacing backslashes with forward slashes. - """ - url = url.strip().strip("/") - if not url.startswith(("http://", "https://")): - url = "http://" + url - url = url.replace("\\", "/") - return url - - def _is_ip_blocked(ip: str) -> bool: """ Checks if the IP address is in a blocked network. @@ -61,9 +47,12 @@ def validate_url(url: str, trusted_origins: list[str]) -> str: to a private, link-local, or otherwise blocked IP address — unless the hostname is explicitly trusted. """ - # Normalize/canonicalize input - url = _canonicalize_url(url) + # Canonicalize URL + url = url.strip("/ ").replace("\\", "/") parsed = urlparse(url) + if not parsed.scheme: + url = f"http://{url}" + parsed = urlparse(url) # Check scheme if parsed.scheme not in ALLOWED_SCHEMES: