SeleniumBase/seleniumbase/undetected/cdp_driver/config.py

345 lines
12 KiB
Python

import logging
import os
import pathlib
import secrets
import sys
import tempfile
import zipfile
from seleniumbase.config import settings
from typing import Union, List, Optional
__all__ = [
"Config",
"find_chrome_executable",
"temp_profile_dir",
"is_root",
"is_posix",
"PathLike",
]
logger = logging.getLogger(__name__)
is_posix = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
PathLike = Union[str, pathlib.Path]
AUTO = None
class Config:
"""Config object"""
def __init__(
self,
user_data_dir: Optional[PathLike] = AUTO,
headless: Optional[bool] = False,
incognito: Optional[bool] = False,
guest: Optional[bool] = False,
browser_executable_path: Optional[PathLike] = AUTO,
browser_args: Optional[List[str]] = AUTO,
sandbox: Optional[bool] = True,
lang: Optional[str] = "en-US",
host: str = AUTO,
port: int = AUTO,
expert: bool = AUTO,
proxy: Optional[str] = None,
extension_dir: Optional[str] = None,
**kwargs: dict,
):
"""
Creates a config object.
Can be called without any arguments to generate a best-practice config,
which is recommended.
Calling the object, eg: myconfig(), returns the list of arguments which
are provided to the browser.
Additional args can be added using the :py:obj:`~add_argument method`.
Instances of this class are usually not instantiated by end users.
:param user_data_dir: the data directory to use
:param headless: set to True for headless mode
:param browser_executable_path:
Specify browser executable, instead of using autodetect.
:param browser_args: Forwarded to browser executable.
Eg: ["--some-chromeparam=somevalue", "some-other-param=someval"]
:param sandbox: disables sandbox
:param autodiscover_targets: use autodiscovery of targets
:param lang:
Language string to use other than the default "en-US,en;q=0.9"
:param expert: When set to True, "expert" mode is enabled.
This adds: --disable-web-security --disable-site-isolation-trials,
as well as some scripts and patching useful for debugging.
(For example, ensuring shadow-root is always in "open" mode.)
:param kwargs:
:type user_data_dir: PathLike
:type headless: bool
:type browser_executable_path: PathLike
:type browser_args: list[str]
:type sandbox: bool
:type lang: str
:type kwargs: dict
"""
if not browser_args:
browser_args = []
if not user_data_dir:
self._user_data_dir = temp_profile_dir()
self._custom_data_dir = False
else:
self.user_data_dir = user_data_dir
if not browser_executable_path:
browser_executable_path = find_chrome_executable()
self._browser_args = browser_args
self.browser_executable_path = browser_executable_path
self.headless = headless
self.incognito = incognito
self.guest = guest
self.sandbox = sandbox
self.host = host
self.port = port
self.expert = expert
self.proxy = proxy
self.extension_dir = extension_dir
self._extensions = []
# When using posix-ish operating system and running as root,
# you must use no_sandbox=True
if is_posix and is_root() and sandbox:
logger.info("Detected root usage, auto-disabling sandbox mode.")
self.sandbox = False
self.autodiscover_targets = True
self.lang = lang
# Other keyword args will be accessible by attribute
self.__dict__.update(kwargs)
super().__init__()
start_width = settings.CHROME_START_WIDTH
start_height = settings.CHROME_START_HEIGHT
start_x = settings.WINDOW_START_X
start_y = settings.WINDOW_START_Y
self._default_browser_args = [
"--window-size=%s,%s" % (start_width, start_height),
"--window-position=%s,%s" % (start_x, start_y),
"--remote-allow-origins=*",
"--no-first-run",
"--no-service-autorun",
"--disable-auto-reload",
"--no-default-browser-check",
"--homepage=about:blank",
"--no-pings",
"--wm-window-animations-disabled",
"--animation-duration-scale=0",
"--enable-privacy-sandbox-ads-apis",
"--safebrowsing-disable-download-protection",
'--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
"--password-store=basic",
"--deny-permission-prompts",
"--disable-infobars",
"--disable-breakpad",
"--disable-prompt-on-repost",
"--disable-password-generation",
"--disable-ipc-flooding-protection",
"--disable-background-timer-throttling",
"--disable-search-engine-choice-screen",
"--disable-backgrounding-occluded-windows",
"--disable-client-side-phishing-detection",
"--disable-top-sites",
"--disable-translate",
"--disable-renderer-backgrounding",
"--disable-background-networking",
"--disable-dev-shm-usage",
"--disable-features=IsolateOrigins,site-per-process,Translate,"
"InsecureDownloadWarnings,DownloadBubble,DownloadBubbleV2,"
"OptimizationTargetPrediction,OptimizationGuideModelDownloading,"
"SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4",
]
@property
def browser_args(self):
return sorted(self._default_browser_args + self._browser_args)
@property
def user_data_dir(self):
return self._user_data_dir
@user_data_dir.setter
def user_data_dir(self, path: PathLike):
self._user_data_dir = str(path)
self._custom_data_dir = True
@property
def uses_custom_data_dir(self) -> bool:
return self._custom_data_dir
def add_extension(self, extension_path: PathLike):
"""
Adds an extension to load. You can set the extension_path to a
folder (containing the manifest), or an extension zip file (.crx)
:param extension_path:
"""
path = pathlib.Path(extension_path)
if not path.exists():
raise FileNotFoundError(
"Could not find anything here: %s" % str(path)
)
if path.is_file():
tf = tempfile.mkdtemp(
prefix="extension_", suffix=secrets.token_hex(4)
)
with zipfile.ZipFile(path, "r") as z:
z.extractall(tf)
self._extensions.append(tf)
elif path.is_dir():
for item in path.rglob("manifest.*"):
path = item.parent
self._extensions.append(path)
def __call__(self):
# The host and port will be added when starting the browser.
# By the time it starts, the port is probably already taken.
args = self._default_browser_args.copy()
args += ["--user-data-dir=%s" % self.user_data_dir]
args += ["--disable-features=IsolateOrigins,site-per-process"]
args += ["--disable-session-crashed-bubble"]
if self.expert:
args += [
"--disable-web-security",
"--disable-site-isolation-trials",
]
if self.proxy:
args.append("--proxy-server=%s" % self.proxy.split("@")[-1])
args.append("--ignore-certificate-errors")
args.append("--ignore-ssl-errors=yes")
if self.extension_dir:
args.append("--load-extension=%s" % self.extension_dir)
if self._browser_args:
args.extend([arg for arg in self._browser_args if arg not in args])
if self.headless:
args.append("--headless=new")
if self.incognito:
args.append("--incognito")
if self.guest:
args.append("--guest")
if not self.sandbox:
args.append("--no-sandbox")
if self.host:
args.append("--remote-debugging-host=%s" % self.host)
if self.port:
args.append("--remote-debugging-port=%s" % self.port)
return args
def add_argument(self, arg: str):
if any(
x in arg.lower()
for x in [
"headless",
"data-dir",
"data_dir",
"no-sandbox",
"no_sandbox",
"lang",
]
):
raise ValueError(
'"%s" is not allowed. Please use one of the '
'attributes of the Config object to set it.'
% arg
)
self._browser_args.append(arg)
def __repr__(self):
s = f"{self.__class__.__name__}"
for k, v in ({**self.__dict__, **self.__class__.__dict__}).items():
if k[0] == "_":
continue
if not v:
continue
if isinstance(v, property):
v = getattr(self, k)
if callable(v):
continue
s += f"\n\t{k} = {v}"
return s
def is_root():
"""
Helper function to determine if the user is trying to launch chrome
under linux as root, which needs some alternative handling.
"""
import ctypes
import os
try:
return os.getuid() == 0
except AttributeError:
return ctypes.windll.shell32.IsUserAnAdmin() != 0
def temp_profile_dir():
"""Generate a temp dir (path)"""
path = os.path.normpath(tempfile.mkdtemp(prefix="uc_"))
return path
def find_chrome_executable(return_all=False):
"""
Finds the chrome, beta, canary, chromium executable
and returns the disk path.
"""
candidates = []
if is_posix:
for item in os.environ.get("PATH").split(os.pathsep):
for subitem in (
"google-chrome",
"chromium",
"chromium-browser",
"chrome",
"google-chrome-stable",
):
candidates.append(os.sep.join((item, subitem)))
if "darwin" in sys.platform:
candidates += [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
]
else:
for item in map(
os.environ.get,
(
"PROGRAMFILES",
"PROGRAMFILES(X86)",
"LOCALAPPDATA",
"PROGRAMW6432",
),
):
if item is not None:
for subitem in (
"Google/Chrome/Application",
"Google/Chrome Beta/Application",
"Google/Chrome Canary/Application",
):
candidates.append(
os.sep.join((item, subitem, "chrome.exe"))
)
rv = []
for candidate in candidates:
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
logger.debug("%s is a valid candidate... " % candidate)
rv.append(candidate)
else:
logger.debug(
"%s is not a valid candidate because it doesn't exist "
"or isn't an executable."
% candidate
)
winner = None
if return_all and rv:
return rv
if rv and len(rv) > 1:
# Assuming the shortest path wins
winner = min(rv, key=lambda x: len(x))
elif len(rv) == 1:
winner = rv[0]
if winner:
return os.path.normpath(winner)
raise FileNotFoundError(
"Could not find a valid chrome browser binary. "
"Please make sure Chrome is installed. "
"Or use the keyword argument: "
"'browser_executable_path=/path/to/your/browser'."
)